aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp250
1 files changed, 250 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
new file mode 100644
index 000000000000..7004adf461a0
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
@@ -0,0 +1,250 @@
+//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a glob pattern matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GlobPattern.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+
+using namespace llvm;
+
+// Expands character ranges and returns a bitmap.
+// For example, "a-cf-hz" is expanded to "abcfghz".
+static Expected<BitVector> expand(StringRef S, StringRef Original) {
+ BitVector BV(256, false);
+
+ // Expand X-Y.
+ for (;;) {
+ if (S.size() < 3)
+ break;
+
+ uint8_t Start = S[0];
+ uint8_t End = S[2];
+
+ // If it doesn't start with something like X-Y,
+ // consume the first character and proceed.
+ if (S[1] != '-') {
+ BV[Start] = true;
+ S = S.substr(1);
+ continue;
+ }
+
+ // It must be in the form of X-Y.
+ // Validate it and then interpret the range.
+ if (Start > End)
+ return make_error<StringError>("invalid glob pattern: " + Original,
+ errc::invalid_argument);
+
+ for (int C = Start; C <= End; ++C)
+ BV[(uint8_t)C] = true;
+ S = S.substr(3);
+ }
+
+ for (char C : S)
+ BV[(uint8_t)C] = true;
+ return BV;
+}
+
+// Identify brace expansions in S and return the list of patterns they expand
+// into.
+static Expected<SmallVector<std::string, 1>>
+parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
+ SmallVector<std::string> SubPatterns = {S.str()};
+ if (!MaxSubPatterns || !S.contains('{'))
+ return std::move(SubPatterns);
+
+ struct BraceExpansion {
+ size_t Start;
+ size_t Length;
+ SmallVector<StringRef, 2> Terms;
+ };
+ SmallVector<BraceExpansion, 0> BraceExpansions;
+
+ BraceExpansion *CurrentBE = nullptr;
+ size_t TermBegin;
+ for (size_t I = 0, E = S.size(); I != E; ++I) {
+ if (S[I] == '[') {
+ I = S.find(']', I + 2);
+ if (I == std::string::npos)
+ return make_error<StringError>("invalid glob pattern, unmatched '['",
+ errc::invalid_argument);
+ } else if (S[I] == '{') {
+ if (CurrentBE)
+ return make_error<StringError>(
+ "nested brace expansions are not supported",
+ errc::invalid_argument);
+ CurrentBE = &BraceExpansions.emplace_back();
+ CurrentBE->Start = I;
+ TermBegin = I + 1;
+ } else if (S[I] == ',') {
+ if (!CurrentBE)
+ continue;
+ CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
+ TermBegin = I + 1;
+ } else if (S[I] == '}') {
+ if (!CurrentBE)
+ continue;
+ if (CurrentBE->Terms.empty())
+ return make_error<StringError>(
+ "empty or singleton brace expansions are not supported",
+ errc::invalid_argument);
+ CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
+ CurrentBE->Length = I - CurrentBE->Start + 1;
+ CurrentBE = nullptr;
+ } else if (S[I] == '\\') {
+ if (++I == E)
+ return make_error<StringError>("invalid glob pattern, stray '\\'",
+ errc::invalid_argument);
+ }
+ }
+ if (CurrentBE)
+ return make_error<StringError>("incomplete brace expansion",
+ errc::invalid_argument);
+
+ size_t NumSubPatterns = 1;
+ for (auto &BE : BraceExpansions) {
+ if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
+ NumSubPatterns = std::numeric_limits<size_t>::max();
+ break;
+ }
+ NumSubPatterns *= BE.Terms.size();
+ }
+ if (NumSubPatterns > *MaxSubPatterns)
+ return make_error<StringError>("too many brace expansions",
+ errc::invalid_argument);
+ // Replace brace expansions in reverse order so that we don't invalidate
+ // earlier start indices
+ for (auto &BE : reverse(BraceExpansions)) {
+ SmallVector<std::string> OrigSubPatterns;
+ std::swap(SubPatterns, OrigSubPatterns);
+ for (StringRef Term : BE.Terms)
+ for (StringRef Orig : OrigSubPatterns)
+ SubPatterns.emplace_back(Orig).replace(BE.Start, BE.Length, Term);
+ }
+ return std::move(SubPatterns);
+}
+
+Expected<GlobPattern>
+GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+ GlobPattern Pat;
+
+ // Store the prefix that does not contain any metacharacter.
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ Pat.Prefix = S.substr(0, PrefixSize);
+ if (PrefixSize == std::string::npos)
+ return Pat;
+ S = S.substr(PrefixSize);
+
+ SmallVector<std::string, 1> SubPats;
+ if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
+ return std::move(Err);
+ for (StringRef SubPat : SubPats) {
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ if (!SubGlobOrErr)
+ return SubGlobOrErr.takeError();
+ Pat.SubGlobs.push_back(*SubGlobOrErr);
+ }
+
+ return Pat;
+}
+
+Expected<GlobPattern::SubGlobPattern>
+GlobPattern::SubGlobPattern::create(StringRef S) {
+ SubGlobPattern Pat;
+
+ // Parse brackets.
+ Pat.Pat.assign(S.begin(), S.end());
+ for (size_t I = 0, E = S.size(); I != E; ++I) {
+ if (S[I] == '[') {
+ // ']' is allowed as the first character of a character class. '[]' is
+ // invalid. So, just skip the first character.
+ ++I;
+ size_t J = S.find(']', I + 1);
+ if (J == StringRef::npos)
+ return make_error<StringError>("invalid glob pattern, unmatched '['",
+ errc::invalid_argument);
+ StringRef Chars = S.substr(I, J - I);
+ bool Invert = S[I] == '^' || S[I] == '!';
+ Expected<BitVector> BV =
+ Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
+ if (!BV)
+ return BV.takeError();
+ if (Invert)
+ BV->flip();
+ Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
+ I = J;
+ } else if (S[I] == '\\') {
+ if (++I == E)
+ return make_error<StringError>("invalid glob pattern, stray '\\'",
+ errc::invalid_argument);
+ }
+ }
+ return Pat;
+}
+
+bool GlobPattern::match(StringRef S) const {
+ if (!S.consume_front(Prefix))
+ return false;
+ if (SubGlobs.empty() && S.empty())
+ return true;
+ for (auto &Glob : SubGlobs)
+ if (Glob.match(S))
+ return true;
+ return false;
+}
+
+// Factor the pattern into segments split by '*'. The segment is matched
+// sequentianlly by finding the first occurrence past the end of the previous
+// match.
+bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+ const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
+ *SavedS = S;
+ const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
+ size_t B = 0, SavedB = 0;
+ while (S != End) {
+ if (P == PEnd)
+ ;
+ else if (*P == '*') {
+ // The non-* substring on the left of '*' matches the tail of S. Save the
+ // positions to be used by backtracking if we see a mismatch later.
+ SegmentBegin = ++P;
+ SavedS = S;
+ SavedB = B;
+ continue;
+ } else if (*P == '[') {
+ if (Brackets[B].Bytes[uint8_t(*S)]) {
+ P = Pat.data() + Brackets[B++].NextOffset;
+ ++S;
+ continue;
+ }
+ } else if (*P == '\\') {
+ if (*++P == *S) {
+ ++P;
+ ++S;
+ continue;
+ }
+ } else if (*P == *S || *P == '?') {
+ ++P;
+ ++S;
+ continue;
+ }
+ if (!SegmentBegin)
+ return false;
+ // We have seen a '*'. Backtrack to the saved positions. Shift the S
+ // position to probe the next starting position in the segment.
+ P = SegmentBegin;
+ S = ++SavedS;
+ B = SavedB;
+ }
+ // All bytes in Str have been matched. Return true if the rest part of Pat is
+ // empty or contains only '*'.
+ return getPat().find_first_not_of('*', P - Pat.data()) == std::string::npos;
+}