diff options
Diffstat (limited to 'contrib/llvm/lib/Support/Regex.cpp')
| -rw-r--r-- | contrib/llvm/lib/Support/Regex.cpp | 205 | 
1 files changed, 205 insertions, 0 deletions
| diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp new file mode 100644 index 000000000000..b1087fd8853c --- /dev/null +++ b/contrib/llvm/lib/Support/Regex.cpp @@ -0,0 +1,205 @@ +//===-- Regex.cpp - Regular Expression matcher implementation -------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a POSIX regular expression matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Regex.h" +#include "regex_impl.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include <string> +using namespace llvm; + +Regex::Regex() : preg(nullptr), error(REG_BADPAT) {} + +Regex::Regex(StringRef regex, unsigned Flags) { +  unsigned flags = 0; +  preg = new llvm_regex(); +  preg->re_endp = regex.end(); +  if (Flags & IgnoreCase)  +    flags |= REG_ICASE; +  if (Flags & Newline) +    flags |= REG_NEWLINE; +  if (!(Flags & BasicRegex)) +    flags |= REG_EXTENDED; +  error = llvm_regcomp(preg, regex.data(), flags|REG_PEND); +} + +Regex::Regex(Regex &®ex) { +  preg = regex.preg; +  error = regex.error; +  regex.preg = nullptr; +  regex.error = REG_BADPAT; +} + +Regex::~Regex() { +  if (preg) { +    llvm_regfree(preg); +    delete preg; +  } +} + +bool Regex::isValid(std::string &Error) const { +  if (!error) +    return true; +   +  size_t len = llvm_regerror(error, preg, nullptr, 0); +   +  Error.resize(len - 1); +  llvm_regerror(error, preg, &Error[0], len); +  return false; +} + +/// getNumMatches - In a valid regex, return the number of parenthesized +/// matches it contains. +unsigned Regex::getNumMatches() const { +  return preg->re_nsub; +} + +bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){ +  if (error) +    return false; + +  unsigned nmatch = Matches ? preg->re_nsub+1 : 0; + +  // pmatch needs to have at least one element. +  SmallVector<llvm_regmatch_t, 8> pm; +  pm.resize(nmatch > 0 ? nmatch : 1); +  pm[0].rm_so = 0; +  pm[0].rm_eo = String.size(); + +  int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); + +  if (rc == REG_NOMATCH) +    return false; +  if (rc != 0) { +    // regexec can fail due to invalid pattern or running out of memory. +    error = rc; +    return false; +  } + +  // There was a match. + +  if (Matches) { // match position requested +    Matches->clear(); +     +    for (unsigned i = 0; i != nmatch; ++i) { +      if (pm[i].rm_so == -1) { +        // this group didn't match +        Matches->push_back(StringRef()); +        continue; +      } +      assert(pm[i].rm_eo >= pm[i].rm_so); +      Matches->push_back(StringRef(String.data()+pm[i].rm_so, +                                   pm[i].rm_eo-pm[i].rm_so)); +    } +  } + +  return true; +} + +std::string Regex::sub(StringRef Repl, StringRef String, +                       std::string *Error) { +  SmallVector<StringRef, 8> Matches; + +  // Reset error, if given. +  if (Error && !Error->empty()) *Error = ""; + +  // Return the input if there was no match. +  if (!match(String, &Matches)) +    return String; + +  // Otherwise splice in the replacement string, starting with the prefix before +  // the match. +  std::string Res(String.begin(), Matches[0].begin()); + +  // Then the replacement string, honoring possible substitutions. +  while (!Repl.empty()) { +    // Skip to the next escape. +    std::pair<StringRef, StringRef> Split = Repl.split('\\'); + +    // Add the skipped substring. +    Res += Split.first; + +    // Check for terminimation and trailing backslash. +    if (Split.second.empty()) { +      if (Repl.size() != Split.first.size() && +          Error && Error->empty()) +        *Error = "replacement string contained trailing backslash"; +      break; +    } + +    // Otherwise update the replacement string and interpret escapes. +    Repl = Split.second; + +    // FIXME: We should have a StringExtras function for mapping C99 escapes. +    switch (Repl[0]) { +      // Treat all unrecognized characters as self-quoting. +    default: +      Res += Repl[0]; +      Repl = Repl.substr(1); +      break; + +      // Single character escapes. +    case 't': +      Res += '\t'; +      Repl = Repl.substr(1); +      break; +    case 'n': +      Res += '\n'; +      Repl = Repl.substr(1); +      break; + +      // Decimal escapes are backreferences. +    case '0': case '1': case '2': case '3': case '4': +    case '5': case '6': case '7': case '8': case '9': { +      // Extract the backreference number. +      StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789")); +      Repl = Repl.substr(Ref.size()); + +      unsigned RefValue; +      if (!Ref.getAsInteger(10, RefValue) && +          RefValue < Matches.size()) +        Res += Matches[RefValue]; +      else if (Error && Error->empty()) +        *Error = ("invalid backreference string '" + Twine(Ref) + "'").str(); +      break; +    } +    } +  } + +  // And finally the suffix. +  Res += StringRef(Matches[0].end(), String.end() - Matches[0].end()); + +  return Res; +} + +// These are the special characters matched in functions like "p_ere_exp". +static const char RegexMetachars[] = "()^$|*+?.[]\\{}"; + +bool Regex::isLiteralERE(StringRef Str) { +  // Check for regex metacharacters.  This list was derived from our regex +  // implementation in regcomp.c and double checked against the POSIX extended +  // regular expression specification. +  return Str.find_first_of(RegexMetachars) == StringRef::npos; +} + +std::string Regex::escape(StringRef String) { +  std::string RegexStr; +  for (unsigned i = 0, e = String.size(); i != e; ++i) { +    if (strchr(RegexMetachars, String[i])) +      RegexStr += '\\'; +    RegexStr += String[i]; +  } + +  return RegexStr; +} | 
