aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:11:55 +0000
commit5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
parent3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp273
1 files changed, 173 insertions, 100 deletions
diff --git a/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
index b8c6ea80b44c..7004adf461a0 100644
--- a/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp
@@ -11,16 +11,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/GlobPattern.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Errc.h"
using namespace llvm;
-static bool hasWildcard(StringRef S) {
- return S.find_first_of("?*[\\") != StringRef::npos;
-}
-
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
static Expected<BitVector> expand(StringRef S, StringRef Original) {
@@ -58,120 +53,198 @@ static Expected<BitVector> expand(StringRef S, StringRef Original) {
return BV;
}
-// This is a scanner for the glob pattern.
-// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
-// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
-// equivalent to "[^<chars>]"), or a non-meta character.
-// This function returns the first token in S.
-static Expected<BitVector> scan(StringRef &S, StringRef Original) {
- switch (S[0]) {
- case '*':
- S = S.substr(1);
- // '*' is represented by an empty bitvector.
- // All other bitvectors are 256-bit long.
- return BitVector();
- case '?':
- S = S.substr(1);
- return BitVector(256, true);
- case '[': {
- // ']' is allowed as the first character of a character class. '[]' is
- // invalid. So, just skip the first character.
- size_t End = S.find(']', 2);
- if (End == StringRef::npos)
- return make_error<StringError>("invalid glob pattern: " + Original,
- errc::invalid_argument);
-
- StringRef Chars = S.substr(1, End - 1);
- S = S.substr(End + 1);
- if (Chars.startswith("^") || Chars.startswith("!")) {
- Expected<BitVector> BV = expand(Chars.substr(1), Original);
- if (!BV)
- return BV.takeError();
- return BV->flip();
+// Identify brace expansions in S and return the list of patterns they expand
+// into.
+static Expected<SmallVector<std::string, 1>>
+parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
+ SmallVector<std::string> SubPatterns = {S.str()};
+ if (!MaxSubPatterns || !S.contains('{'))
+ return std::move(SubPatterns);
+
+ struct BraceExpansion {
+ size_t Start;
+ size_t Length;
+ SmallVector<StringRef, 2> Terms;
+ };
+ SmallVector<BraceExpansion, 0> BraceExpansions;
+
+ BraceExpansion *CurrentBE = nullptr;
+ size_t TermBegin;
+ for (size_t I = 0, E = S.size(); I != E; ++I) {
+ if (S[I] == '[') {
+ I = S.find(']', I + 2);
+ if (I == std::string::npos)
+ return make_error<StringError>("invalid glob pattern, unmatched '['",
+ errc::invalid_argument);
+ } else if (S[I] == '{') {
+ if (CurrentBE)
+ return make_error<StringError>(
+ "nested brace expansions are not supported",
+ errc::invalid_argument);
+ CurrentBE = &BraceExpansions.emplace_back();
+ CurrentBE->Start = I;
+ TermBegin = I + 1;
+ } else if (S[I] == ',') {
+ if (!CurrentBE)
+ continue;
+ CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
+ TermBegin = I + 1;
+ } else if (S[I] == '}') {
+ if (!CurrentBE)
+ continue;
+ if (CurrentBE->Terms.empty())
+ return make_error<StringError>(
+ "empty or singleton brace expansions are not supported",
+ errc::invalid_argument);
+ CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
+ CurrentBE->Length = I - CurrentBE->Start + 1;
+ CurrentBE = nullptr;
+ } else if (S[I] == '\\') {
+ if (++I == E)
+ return make_error<StringError>("invalid glob pattern, stray '\\'",
+ errc::invalid_argument);
}
- return expand(Chars, Original);
}
- case '\\':
- // Eat this character and fall through below to treat it like a non-meta
- // character.
- S = S.substr(1);
- [[fallthrough]];
- default:
- BitVector BV(256, false);
- BV[(uint8_t)S[0]] = true;
- S = S.substr(1);
- return BV;
+ if (CurrentBE)
+ return make_error<StringError>("incomplete brace expansion",
+ errc::invalid_argument);
+
+ size_t NumSubPatterns = 1;
+ for (auto &BE : BraceExpansions) {
+ if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
+ NumSubPatterns = std::numeric_limits<size_t>::max();
+ break;
+ }
+ NumSubPatterns *= BE.Terms.size();
+ }
+ if (NumSubPatterns > *MaxSubPatterns)
+ return make_error<StringError>("too many brace expansions",
+ errc::invalid_argument);
+ // Replace brace expansions in reverse order so that we don't invalidate
+ // earlier start indices
+ for (auto &BE : reverse(BraceExpansions)) {
+ SmallVector<std::string> OrigSubPatterns;
+ std::swap(SubPatterns, OrigSubPatterns);
+ for (StringRef Term : BE.Terms)
+ for (StringRef Orig : OrigSubPatterns)
+ SubPatterns.emplace_back(Orig).replace(BE.Start, BE.Length, Term);
}
+ return std::move(SubPatterns);
}
-Expected<GlobPattern> GlobPattern::create(StringRef S) {
+Expected<GlobPattern>
+GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
- // S doesn't contain any metacharacter,
- // so the regular string comparison should work.
- if (!hasWildcard(S)) {
- Pat.Exact = S;
- return Pat;
- }
-
- // S is something like "foo*", and the "* is not escaped. We can use
- // startswith().
- if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
- Pat.Prefix = S.drop_back();
+ // Store the prefix that does not contain any metacharacter.
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ Pat.Prefix = S.substr(0, PrefixSize);
+ if (PrefixSize == std::string::npos)
return Pat;
+ S = S.substr(PrefixSize);
+
+ SmallVector<std::string, 1> SubPats;
+ if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
+ return std::move(Err);
+ for (StringRef SubPat : SubPats) {
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ if (!SubGlobOrErr)
+ return SubGlobOrErr.takeError();
+ Pat.SubGlobs.push_back(*SubGlobOrErr);
}
- // S is something like "*foo". We can use endswith().
- if (S.startswith("*") && !hasWildcard(S.drop_front())) {
- Pat.Suffix = S.drop_front();
- return Pat;
- }
+ return Pat;
+}
- // Otherwise, we need to do real glob pattern matching.
- // Parse the pattern now.
- StringRef Original = S;
- while (!S.empty()) {
- Expected<BitVector> BV = scan(S, Original);
- if (!BV)
- return BV.takeError();
- Pat.Tokens.push_back(*BV);
+Expected<GlobPattern::SubGlobPattern>
+GlobPattern::SubGlobPattern::create(StringRef S) {
+ SubGlobPattern Pat;
+
+ // Parse brackets.
+ Pat.Pat.assign(S.begin(), S.end());
+ for (size_t I = 0, E = S.size(); I != E; ++I) {
+ if (S[I] == '[') {
+ // ']' is allowed as the first character of a character class. '[]' is
+ // invalid. So, just skip the first character.
+ ++I;
+ size_t J = S.find(']', I + 1);
+ if (J == StringRef::npos)
+ return make_error<StringError>("invalid glob pattern, unmatched '['",
+ errc::invalid_argument);
+ StringRef Chars = S.substr(I, J - I);
+ bool Invert = S[I] == '^' || S[I] == '!';
+ Expected<BitVector> BV =
+ Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
+ if (!BV)
+ return BV.takeError();
+ if (Invert)
+ BV->flip();
+ Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
+ I = J;
+ } else if (S[I] == '\\') {
+ if (++I == E)
+ return make_error<StringError>("invalid glob pattern, stray '\\'",
+ errc::invalid_argument);
+ }
}
return Pat;
}
bool GlobPattern::match(StringRef S) const {
- if (Exact)
- return S == *Exact;
- if (Prefix)
- return S.startswith(*Prefix);
- if (Suffix)
- return S.endswith(*Suffix);
- return matchOne(Tokens, S);
+ if (!S.consume_front(Prefix))
+ return false;
+ if (SubGlobs.empty() && S.empty())
+ return true;
+ for (auto &Glob : SubGlobs)
+ if (Glob.match(S))
+ return true;
+ return false;
}
-// Runs glob pattern Pats against string S.
-bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
- for (;;) {
- if (Pats.empty())
- return S.empty();
-
- // If Pats[0] is '*', try to match Pats[1..] against all possible
- // tail strings of S to see at least one pattern succeeds.
- if (Pats[0].size() == 0) {
- Pats = Pats.slice(1);
- if (Pats.empty())
- // Fast path. If a pattern is '*', it matches anything.
- return true;
- for (size_t I = 0, E = S.size(); I < E; ++I)
- if (matchOne(Pats, S.substr(I)))
- return true;
- return false;
+// Factor the pattern into segments split by '*'. The segment is matched
+// sequentianlly by finding the first occurrence past the end of the previous
+// match.
+bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+ const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
+ *SavedS = S;
+ const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
+ size_t B = 0, SavedB = 0;
+ while (S != End) {
+ if (P == PEnd)
+ ;
+ else if (*P == '*') {
+ // The non-* substring on the left of '*' matches the tail of S. Save the
+ // positions to be used by backtracking if we see a mismatch later.
+ SegmentBegin = ++P;
+ SavedS = S;
+ SavedB = B;
+ continue;
+ } else if (*P == '[') {
+ if (Brackets[B].Bytes[uint8_t(*S)]) {
+ P = Pat.data() + Brackets[B++].NextOffset;
+ ++S;
+ continue;
+ }
+ } else if (*P == '\\') {
+ if (*++P == *S) {
+ ++P;
+ ++S;
+ continue;
+ }
+ } else if (*P == *S || *P == '?') {
+ ++P;
+ ++S;
+ continue;
}
-
- // If Pats[0] is not '*', it must consume one character.
- if (S.empty() || !Pats[0][(uint8_t)S[0]])
+ if (!SegmentBegin)
return false;
- Pats = Pats.slice(1);
- S = S.substr(1);
+ // We have seen a '*'. Backtrack to the saved positions. Shift the S
+ // position to probe the next starting position in the segment.
+ P = SegmentBegin;
+ S = ++SavedS;
+ B = SavedB;
}
+ // All bytes in Str have been matched. Return true if the rest part of Pat is
+ // empty or contains only '*'.
+ return getPat().find_first_not_of('*', P - Pat.data()) == std::string::npos;
}