diff options
Diffstat (limited to 'utils/FileCheck')
| -rw-r--r-- | utils/FileCheck/FileCheck.cpp | 286 | 
1 files changed, 212 insertions, 74 deletions
| diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index e79162867eba..b0ef67ac88ba 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -17,17 +17,21 @@  //===----------------------------------------------------------------------===//  #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/MemoryBuffer.h"  #include "llvm/Support/PrettyStackTrace.h"  #include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h"  #include "llvm/Support/SourceMgr.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Signals.h"  #include "llvm/Support/system_error.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringMap.h"  #include <algorithm> +#include <map> +#include <string> +#include <vector>  using namespace llvm;  static cl::opt<std::string> @@ -63,23 +67,29 @@ class Pattern {    /// RegEx - If non-empty, this is a regex pattern.    std::string RegExStr; +  /// \brief Contains the number of line this pattern is in. +  unsigned LineNumber; +    /// VariableUses - Entries in this vector map to uses of a variable in the    /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain    /// "foobaz" and we'll get an entry in this vector that tells us to insert the    /// value of bar at offset 3.    std::vector<std::pair<StringRef, unsigned> > VariableUses; -  /// VariableDefs - Entries in this vector map to definitions of a variable in -  /// the pattern, e.g. "foo[[bar:.*]]baz".  In this case, the RegExStr will -  /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1.  The -  /// index indicates what parenthesized value captures the variable value. -  std::vector<std::pair<StringRef, unsigned> > VariableDefs; +  /// VariableDefs - Maps definitions of variables to their parenthesized +  /// capture numbers. +  /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. +  std::map<StringRef, unsigned> VariableDefs;  public:    Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { } -  bool ParsePattern(StringRef PatternStr, SourceMgr &SM); +  /// ParsePattern - Parse the given string into the Pattern.  SM provides the +  /// SourceMgr used for error reports, and LineNumber is the line number in +  /// the input file from which the pattern string was read. +  /// Returns true in case of an error, false otherwise. +  bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);    /// Match - Match the pattern string against the input buffer Buffer.  This    /// returns the position that is matched or npos if there is no match.  If @@ -97,17 +107,31 @@ public:  private:    static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr); -  bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM); +  bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); +  void AddBackrefToRegEx(unsigned BackrefNum);    /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of    /// matching this pattern at the start of \arg Buffer; a distance of zero    /// should correspond to a perfect match.    unsigned ComputeMatchDistance(StringRef Buffer,                                 const StringMap<StringRef> &VariableTable) const; + +  /// \brief Evaluates expression and stores the result to \p Value. +  /// \return true on success. false when the expression has invalid syntax. +  bool EvaluateExpression(StringRef Expr, std::string &Value) const; + +  /// \brief Finds the closing sequence of a regex variable usage or +  /// definition. Str has to point in the beginning of the definition +  /// (right after the opening sequence). +  /// \return offset of the closing sequence within Str, or npos if it was not +  /// found. +  size_t FindRegexVarEnd(StringRef Str);  }; -bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { +bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM, +                           unsigned LineNumber) { +  this->LineNumber = LineNumber;    PatternLoc = SMLoc::getFromPointer(PatternStr.data());    // Ignore trailing whitespace. @@ -140,8 +164,7 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {    while (!PatternStr.empty()) {      // RegEx matches.      if (PatternStr.startswith("{{")) { - -      // Otherwise, this is the start of a regex match.  Scan for the }}. +      // This is the start of a regex match.  Scan for the }}.        size_t End = PatternStr.find("}}");        if (End == StringRef::npos) {          SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), @@ -171,8 +194,10 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {      // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject      // it.  This is to catch some common errors.      if (PatternStr.startswith("[[")) { -      // Verify that it is terminated properly. -      size_t End = PatternStr.find("]]"); +      // Find the closing bracket pair ending the match.  End is going to be an +      // offset relative to the beginning of the match string. +      size_t End = FindRegexVarEnd(PatternStr.substr(2)); +        if (End == StringRef::npos) {          SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),                          SourceMgr::DK_Error, @@ -180,8 +205,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {          return true;        } -      StringRef MatchStr = PatternStr.substr(2, End-2); -      PatternStr = PatternStr.substr(End+2); +      StringRef MatchStr = PatternStr.substr(2, End); +      PatternStr = PatternStr.substr(End+4);        // Get the regex name (e.g. "foo").        size_t NameEnd = MatchStr.find(':'); @@ -193,16 +218,31 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {          return true;        } -      // Verify that the name is well formed. -      for (unsigned i = 0, e = Name.size(); i != e; ++i) -        if (Name[i] != '_' && !isalnum(Name[i])) { +      // Verify that the name/expression is well formed. FileCheck currently +      // supports @LINE, @LINE+number, @LINE-number expressions. The check here +      // is relaxed, more strict check is performed in \c EvaluateExpression. +      bool IsExpression = false; +      for (unsigned i = 0, e = Name.size(); i != e; ++i) { +        if (i == 0 && Name[i] == '@') { +          if (NameEnd != StringRef::npos) { +            SM.PrintMessage(SMLoc::getFromPointer(Name.data()), +                            SourceMgr::DK_Error, +                            "invalid name in named regex definition"); +            return true; +          } +          IsExpression = true; +          continue; +        } +        if (Name[i] != '_' && !isalnum(Name[i]) && +            (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {            SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),                            SourceMgr::DK_Error, "invalid name in named regex");            return true;          } +      }        // Name can't start with a digit. -      if (isdigit(Name[0])) { +      if (isdigit(static_cast<unsigned char>(Name[0]))) {          SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,                          "invalid name in named regex");          return true; @@ -210,12 +250,25 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {        // Handle [[foo]].        if (NameEnd == StringRef::npos) { -        VariableUses.push_back(std::make_pair(Name, RegExStr.size())); +        // Handle variables that were defined earlier on the same line by +        // emitting a backreference. +        if (VariableDefs.find(Name) != VariableDefs.end()) { +          unsigned VarParenNum = VariableDefs[Name]; +          if (VarParenNum < 1 || VarParenNum > 9) { +            SM.PrintMessage(SMLoc::getFromPointer(Name.data()), +                            SourceMgr::DK_Error, +                            "Can't back-reference more than 9 variables"); +            return true; +          } +          AddBackrefToRegEx(VarParenNum); +        } else { +          VariableUses.push_back(std::make_pair(Name, RegExStr.size())); +        }          continue;        }        // Handle [[foo:.*]]. -      VariableDefs.push_back(std::make_pair(Name, CurParen)); +      VariableDefs[Name] = CurParen;        RegExStr += '(';        ++CurParen; @@ -231,7 +284,6 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {      FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));      AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);      PatternStr = PatternStr.substr(FixedMatchEnd); -    continue;    }    return false; @@ -264,21 +316,46 @@ void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {    }  } -bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen, +bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,                                SourceMgr &SM) { -  Regex R(RegexStr); +  Regex R(RS);    std::string Error;    if (!R.isValid(Error)) { -    SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), SourceMgr::DK_Error, +    SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,                      "invalid regex: " + Error);      return true;    } -  RegExStr += RegexStr.str(); +  RegExStr += RS.str();    CurParen += R.getNumMatches();    return false;  } +void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { +  assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); +  std::string Backref = std::string("\\") + +                        std::string(1, '0' + BackrefNum); +  RegExStr += Backref; +} + +bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { +  // The only supported expression is @LINE([\+-]\d+)? +  if (!Expr.startswith("@LINE")) +    return false; +  Expr = Expr.substr(StringRef("@LINE").size()); +  int Offset = 0; +  if (!Expr.empty()) { +    if (Expr[0] == '+') +      Expr = Expr.substr(1); +    else if (Expr[0] != '-') +      return false; +    if (Expr.getAsInteger(10, Offset)) +      return false; +  } +  Value = llvm::itostr(LineNumber + Offset); +  return true; +} +  /// Match - Match the pattern string against the input buffer Buffer.  This  /// returns the position that is matched or npos if there is no match.  If  /// there is a match, the size of the matched string is returned in MatchLen. @@ -307,15 +384,21 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,      unsigned InsertOffset = 0;      for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { -      StringMap<StringRef>::iterator it = -        VariableTable.find(VariableUses[i].first); -      // If the variable is undefined, return an error. -      if (it == VariableTable.end()) -        return StringRef::npos; - -      // Look up the value and escape it so that we can plop it into the regex.        std::string Value; -      AddFixedStringToRegEx(it->second, Value); + +      if (VariableUses[i].first[0] == '@') { +        if (!EvaluateExpression(VariableUses[i].first, Value)) +          return StringRef::npos; +      } else { +        StringMap<StringRef>::iterator it = +          VariableTable.find(VariableUses[i].first); +        // If the variable is undefined, return an error. +        if (it == VariableTable.end()) +          return StringRef::npos; + +        // Look up the value and escape it so that we can plop it into the regex. +        AddFixedStringToRegEx(it->second, Value); +      }        // Plop it into the regex at the adjusted offset.        TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, @@ -337,10 +420,11 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,    StringRef FullMatch = MatchInfo[0];    // If this defines any variables, remember their values. -  for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) { -    assert(VariableDefs[i].second < MatchInfo.size() && -           "Internal paren error"); -    VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second]; +  for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(), +                                                     E = VariableDefs.end(); +       I != E; ++I) { +    assert(I->second < MatchInfo.size() && "Internal paren error"); +    VariableTable[I->first] = MatchInfo[I->second];    }    MatchLen = FullMatch.size(); @@ -371,19 +455,31 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,    // variable values.    if (!VariableUses.empty()) {      for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { -      StringRef Var = VariableUses[i].first; -      StringMap<StringRef>::const_iterator it = VariableTable.find(Var);        SmallString<256> Msg;        raw_svector_ostream OS(Msg); - -      // Check for undefined variable references. -      if (it == VariableTable.end()) { -        OS << "uses undefined variable \""; -        OS.write_escaped(Var) << "\"";; +      StringRef Var = VariableUses[i].first; +      if (Var[0] == '@') { +        std::string Value; +        if (EvaluateExpression(Var, Value)) { +          OS << "with expression \""; +          OS.write_escaped(Var) << "\" equal to \""; +          OS.write_escaped(Value) << "\""; +        } else { +          OS << "uses incorrect expression \""; +          OS.write_escaped(Var) << "\""; +        }        } else { -        OS << "with variable \""; -        OS.write_escaped(Var) << "\" equal to \""; -        OS.write_escaped(it->second) << "\""; +        StringMap<StringRef>::const_iterator it = VariableTable.find(Var); + +        // Check for undefined variable references. +        if (it == VariableTable.end()) { +          OS << "uses undefined variable \""; +          OS.write_escaped(Var) << "\""; +        } else { +          OS << "with variable \""; +          OS.write_escaped(Var) << "\" equal to \""; +          OS.write_escaped(it->second) << "\""; +        }        }        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, @@ -432,6 +528,40 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,    }  } +size_t Pattern::FindRegexVarEnd(StringRef Str) { +  // Offset keeps track of the current offset within the input Str +  size_t Offset = 0; +  // [...] Nesting depth +  size_t BracketDepth = 0; + +  while (!Str.empty()) { +    if (Str.startswith("]]") && BracketDepth == 0) +      return Offset; +    if (Str[0] == '\\') { +      // Backslash escapes the next char within regexes, so skip them both. +      Str = Str.substr(2); +      Offset += 2; +    } else { +      switch (Str[0]) { +        default: +          break; +        case '[': +          BracketDepth++; +          break; +        case ']': +          assert(BracketDepth > 0 && "Invalid regex"); +          BracketDepth--; +          break; +      } +      Str = Str.substr(1); +      Offset++; +    } +  } + +  return StringRef::npos; +} + +  //===----------------------------------------------------------------------===//  // Check Strings.  //===----------------------------------------------------------------------===// @@ -457,9 +587,13 @@ struct CheckString {      : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}  }; -/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified -/// memory buffer, free it, and return a new one. -static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { +/// Canonicalize whitespaces in the input file. Line endings are replaced +/// with UNIX-style '\n'. +/// +/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace +/// characters to a single space. +static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB, +                                           bool PreserveHorizontal) {    SmallString<128> NewFile;    NewFile.reserve(MB->getBufferSize()); @@ -470,8 +604,9 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {        continue;      } -    // If current char is not a horizontal whitespace, dump it to output as is. -    if (*Ptr != ' ' && *Ptr != '\t') { +    // If current char is not a horizontal whitespace or if horizontal  +    // whitespace canonicalization is disabled, dump it to output as is. +    if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {        NewFile.push_back(*Ptr);        continue;      } @@ -494,9 +629,9 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {  /// ReadCheckFile - Read the check file, which specifies the sequence of  /// expected strings.  The strings are added to the CheckStrings vector. +/// Returns true in case of an error, false otherwise.  static bool ReadCheckFile(SourceMgr &SM,                            std::vector<CheckString> &CheckStrings) { -  // Open the check file, and tell SourceMgr about it.    OwningPtr<MemoryBuffer> File;    if (error_code ec =          MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) { @@ -504,28 +639,33 @@ static bool ReadCheckFile(SourceMgr &SM,             << ec.message() << '\n';      return true;    } -  MemoryBuffer *F = File.take();    // If we want to canonicalize whitespace, strip excess whitespace from the -  // buffer containing the CHECK lines. -  if (!NoCanonicalizeWhiteSpace) -    F = CanonicalizeInputFile(F); +  // buffer containing the CHECK lines. Remove DOS style line endings. +  MemoryBuffer *F = +    CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);    SM.AddNewSourceBuffer(F, SMLoc());    // Find all instances of CheckPrefix followed by : in the file.    StringRef Buffer = F->getBuffer(); -    std::vector<std::pair<SMLoc, Pattern> > NotMatches; +  // LineNumber keeps track of the line on which CheckPrefix instances are +  // found. +  unsigned LineNumber = 1; +    while (1) {      // See if Prefix occurs in the memory buffer. -    Buffer = Buffer.substr(Buffer.find(CheckPrefix)); - +    size_t PrefixLoc = Buffer.find(CheckPrefix);      // If we didn't find a match, we're done. -    if (Buffer.empty()) +    if (PrefixLoc == StringRef::npos)        break; +    LineNumber += Buffer.substr(0, PrefixLoc).count('\n'); + +    Buffer = Buffer.substr(PrefixLoc); +      const char *CheckPrefixStart = Buffer.data();      // When we find a check prefix, keep track of whether we find CHECK: or @@ -560,12 +700,11 @@ static bool ReadCheckFile(SourceMgr &SM,      // Parse the pattern.      Pattern P; -    if (P.ParsePattern(Buffer.substr(0, EOL), SM)) +    if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber))        return true;      Buffer = Buffer.substr(EOL); -      // Verify that CHECK-NEXT lines have at least one CHECK line before them.      if (IsCheckNext && CheckStrings.empty()) {        SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart), @@ -582,7 +721,6 @@ static bool ReadCheckFile(SourceMgr &SM,        continue;      } -      // Okay, add the string we captured to the output vector and move on.      CheckStrings.push_back(CheckString(P,                                         PatternLoc, @@ -663,18 +801,18 @@ int main(int argc, char **argv) {          MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {      errs() << "Could not open input file '" << InputFilename << "': "             << ec.message() << '\n'; -    return true; +    return 2;    } -  MemoryBuffer *F = File.take(); -  if (F->getBufferSize() == 0) { +  if (File->getBufferSize() == 0) {      errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; -    return 1; +    return 2;    } -   +    // Remove duplicate spaces in the input file if requested. -  if (!NoCanonicalizeWhiteSpace) -    F = CanonicalizeInputFile(F); +  // Remove DOS style line endings. +  MemoryBuffer *F = +    CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);    SM.AddNewSourceBuffer(F, SMLoc()); | 
