diff options
Diffstat (limited to 'contrib/llvm/lib/Support/Regex.cpp')
| -rw-r--r-- | contrib/llvm/lib/Support/Regex.cpp | 168 | 
1 files changed, 168 insertions, 0 deletions
| diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp new file mode 100644 index 000000000000..d293da07d684 --- /dev/null +++ b/contrib/llvm/lib/Support/Regex.cpp @@ -0,0 +1,168 @@ +//===-- Regex.cpp - Regular Expression matcher implementation -------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a POSIX regular expression matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Regex.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "regex_impl.h" +#include <string> +using namespace llvm; + +Regex::Regex(StringRef regex, unsigned Flags) { +  unsigned flags = 0; +  preg = new llvm_regex(); +  preg->re_endp = regex.end(); +  if (Flags & IgnoreCase)  +    flags |= REG_ICASE; +  if (Flags & Newline) +    flags |= REG_NEWLINE; +  error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND); +} + +Regex::~Regex() { +  llvm_regfree(preg); +  delete preg; +} + +bool Regex::isValid(std::string &Error) { +  if (!error) +    return true; +   +  size_t len = llvm_regerror(error, preg, NULL, 0); +   +  Error.resize(len); +  llvm_regerror(error, preg, &Error[0], len); +  return false; +} + +/// getNumMatches - In a valid regex, return the number of parenthesized +/// matches it contains. +unsigned Regex::getNumMatches() const { +  return preg->re_nsub; +} + +bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){ +  unsigned nmatch = Matches ? preg->re_nsub+1 : 0; + +  // pmatch needs to have at least one element. +  SmallVector<llvm_regmatch_t, 8> pm; +  pm.resize(nmatch > 0 ? nmatch : 1); +  pm[0].rm_so = 0; +  pm[0].rm_eo = String.size(); + +  int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); + +  if (rc == REG_NOMATCH) +    return false; +  if (rc != 0) { +    // regexec can fail due to invalid pattern or running out of memory. +    error = rc; +    return false; +  } + +  // There was a match. + +  if (Matches) { // match position requested +    Matches->clear(); +     +    for (unsigned i = 0; i != nmatch; ++i) { +      if (pm[i].rm_so == -1) { +        // this group didn't match +        Matches->push_back(StringRef()); +        continue; +      } +      assert(pm[i].rm_eo >= pm[i].rm_so); +      Matches->push_back(StringRef(String.data()+pm[i].rm_so, +                                   pm[i].rm_eo-pm[i].rm_so)); +    } +  } + +  return true; +} + +std::string Regex::sub(StringRef Repl, StringRef String, +                       std::string *Error) { +  SmallVector<StringRef, 8> Matches; + +  // Reset error, if given. +  if (Error && !Error->empty()) *Error = ""; + +  // Return the input if there was no match. +  if (!match(String, &Matches)) +    return String; + +  // Otherwise splice in the replacement string, starting with the prefix before +  // the match. +  std::string Res(String.begin(), Matches[0].begin()); + +  // Then the replacement string, honoring possible substitutions. +  while (!Repl.empty()) { +    // Skip to the next escape. +    std::pair<StringRef, StringRef> Split = Repl.split('\\'); + +    // Add the skipped substring. +    Res += Split.first; + +    // Check for terminimation and trailing backslash. +    if (Split.second.empty()) { +      if (Repl.size() != Split.first.size() && +          Error && Error->empty()) +        *Error = "replacement string contained trailing backslash"; +      break; +    } + +    // Otherwise update the replacement string and interpret escapes. +    Repl = Split.second; + +    // FIXME: We should have a StringExtras function for mapping C99 escapes. +    switch (Repl[0]) { +      // Treat all unrecognized characters as self-quoting. +    default: +      Res += Repl[0]; +      Repl = Repl.substr(1); +      break; + +      // Single character escapes. +    case 't': +      Res += '\t'; +      Repl = Repl.substr(1); +      break; +    case 'n': +      Res += '\n'; +      Repl = Repl.substr(1); +      break; + +      // Decimal escapes are backreferences. +    case '0': case '1': case '2': case '3': case '4': +    case '5': case '6': case '7': case '8': case '9': { +      // Extract the backreference number. +      StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789")); +      Repl = Repl.substr(Ref.size()); + +      unsigned RefValue; +      if (!Ref.getAsInteger(10, RefValue) && +          RefValue < Matches.size()) +        Res += Matches[RefValue]; +      else if (Error && Error->empty()) +        *Error = "invalid backreference string '" + Ref.str() + "'"; +      break; +    } +    } +  } + +  // And finally the suffix. +  Res += StringRef(Matches[0].end(), String.end() - Matches[0].end()); + +  return Res; +} | 
