普通文本  |  220行  |  6.96 KB

// Copyright (c) 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "tools/gn/pattern.h"

#include "tools/gn/value.h"

const char kPattern_Help[] =
    "Patterns\n"
    "  Patterns are VERY limited regular expressions that are used in\n"
    "  several places.\n"
    "\n"
    "  Patterns must match the entire input string to be counted as a match.\n"
    "  In regular expression parlance, there is an implicit \"^...$\"\n"
    "  surrounding your input. If you want to match a substring, you need to\n"
    "  use wildcards at the beginning and end.\n"
    "\n"
    "  There are only two special tokens understood by the pattern matcher.\n"
    "  Everything else is a literal.\n"
    "\n"
    "   * Matches zero or more of any character. It does not depend on the\n"
    "     preceeding character (in regular expression parlance it is\n"
    "     equivalent to \".*\").\n"
    "\n"
    "  \\b Matches a path boundary. This will match the beginning or end of\n"
    "     a string, or a slash.\n"
    "\n"
    "Examples\n"
    "\n"
    "  \"*asdf*\"\n"
    "      Matches a string containing \"asdf\" anywhere.\n"
    "\n"
    "  \"asdf\"\n"
    "      Matches only the exact string \"asdf\".\n"
    "\n"
    "  \"*.cc\"\n"
    "      Matches strings ending in the literal \".cc\".\n"
    "\n"
    "  \"\\bwin/*\"\n"
    "      Matches \"win/foo\" and \"foo/win/bar.cc\" but not \"iwin/foo\".\n";

namespace {

void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) {
  // Set when the last subrange is a literal so we can just append when we
  // find another literal.
  Pattern::Subrange* last_literal = NULL;

  for (size_t i = 0; i < s.size(); i++) {
    if (s[i] == '*') {
      // Don't allow two **.
      if (out->size() == 0 ||
          (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING)
        out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING));
      last_literal = NULL;
    } else if (s[i] == '\\') {
      if (i < s.size() - 1 && s[i + 1] == 'b') {
        // "\b" means path boundary.
        i++;
        out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY));
        last_literal = NULL;
      } else {
        // Backslash + anything else means that literal char.
        if (!last_literal) {
          out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
          last_literal = &(*out)[out->size() - 1];
        }
        if (i < s.size() - 1) {
          i++;
          last_literal->literal.push_back(s[i]);
        } else {
          // Single backslash at end, use literal backslash.
          last_literal->literal.push_back('\\');
        }
      }
    } else {
      if (!last_literal) {
        out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
        last_literal = &(*out)[out->size() - 1];
      }
      last_literal->literal.push_back(s[i]);
    }
  }
}

}  // namespace

Pattern::Pattern(const std::string& s) {
  ParsePattern(s, &subranges_);
  is_suffix_ =
      (subranges_.size() == 2 &&
       subranges_[0].type == Subrange::ANYTHING &&
       subranges_[1].type == Subrange::LITERAL);
}

Pattern::~Pattern() {
}

bool Pattern::MatchesString(const std::string& s) const {
  // Empty pattern matches only empty string.
  if (subranges_.empty())
    return s.empty();

  if (is_suffix_) {
    const std::string& suffix = subranges_[1].literal;
    if (suffix.size() > s.size())
      return false;  // Too short.
    return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
  }

  return RecursiveMatch(s, 0, 0, true);
}

// We assume the number of ranges is small so recursive is always reasonable.
// Could be optimized to only be recursive for *.
bool Pattern::RecursiveMatch(const std::string& s,
                             size_t begin_char,
                             size_t subrange_index,
                             bool allow_implicit_path_boundary) const {
  if (subrange_index >= subranges_.size()) {
    // Hit the end of our subranges, the text should also be at the end for a
    // match.
    return begin_char == s.size();
  }

  const Subrange& sr = subranges_[subrange_index];
  switch (sr.type) {
    case Subrange::LITERAL: {
      if (s.size() - begin_char < sr.literal.size())
        return false;  // Not enough room.
      if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0)
        return false;  // Literal doesn't match.

      // Recursively check the next one.
      return RecursiveMatch(s, begin_char + sr.literal.size(),
                            subrange_index + 1, true);
    }

    case Subrange::PATH_BOUNDARY: {
      // When we can accept an implicit path boundary, we have to check both
      // a match of the literal and the implicit one.
      if (allow_implicit_path_boundary &&
          (begin_char == 0 || begin_char == s.size())) {
        // At implicit path boundary, see if the rest of the pattern matches.
        if (RecursiveMatch(s, begin_char, subrange_index + 1, false))
          return true;
      }

      // Check for a literal "/".
      if (begin_char < s.size() && s[begin_char] == '/') {
        // At explicit boundary, see if the rest of the pattern matches.
        if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true))
          return true;
      }
      return false;
    }

    case Subrange::ANYTHING: {
      if (subrange_index == subranges_.size() - 1)
        return true;  // * at the end, consider it matching.

      size_t min_next_size = sr.MinSize();

      // We don't care about exactly what matched as long as there was a match,
      // so we can do this front-to-back. If we needed the match, we would
      // normally want "*" to be greedy so would work backwards.
      for (size_t i = begin_char; i < s.size() - min_next_size; i++) {
        // Note: this could probably be faster by detecting the type of the
        // next match in advance and checking for a match in this loop rather
        // than doing a full recursive call for each character.
        if (RecursiveMatch(s, i, subrange_index + 1, true))
          return true;
      }
      return false;
    }

    default:
      NOTREACHED();
  }

  return false;
}

PatternList::PatternList() {
}

PatternList::~PatternList() {
}

void PatternList::SetFromValue(const Value& v, Err* err) {
  patterns_.clear();

  if (v.type() != Value::LIST) {
    *err = Err(v.origin(), "This value must be a list.");
    return;
  }

  const std::vector<Value>& list = v.list_value();
  for (size_t i = 0; i < list.size(); i++) {
    if (!list[i].VerifyTypeIs(Value::STRING, err))
      return;
    patterns_.push_back(Pattern(list[i].string_value()));
  }
}

bool PatternList::MatchesString(const std::string& s) const {
  for (size_t i = 0; i < patterns_.size(); i++) {
    if (patterns_[i].MatchesString(s))
      return true;
  }
  return false;
}

bool PatternList::MatchesValue(const Value& v) const {
  if (v.type() == Value::STRING)
    return MatchesString(v.string_value());
  return false;
}