diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp new file mode 100644 index 000000000000..7004adf461a0 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Support/GlobPattern.cpp @@ -0,0 +1,250 @@ +//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a glob pattern matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/GlobPattern.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" + +using namespace llvm; + +// Expands character ranges and returns a bitmap. +// For example, "a-cf-hz" is expanded to "abcfghz". +static Expected<BitVector> expand(StringRef S, StringRef Original) { + BitVector BV(256, false); + + // Expand X-Y. + for (;;) { + if (S.size() < 3) + break; + + uint8_t Start = S[0]; + uint8_t End = S[2]; + + // If it doesn't start with something like X-Y, + // consume the first character and proceed. + if (S[1] != '-') { + BV[Start] = true; + S = S.substr(1); + continue; + } + + // It must be in the form of X-Y. + // Validate it and then interpret the range. + if (Start > End) + return make_error<StringError>("invalid glob pattern: " + Original, + errc::invalid_argument); + + for (int C = Start; C <= End; ++C) + BV[(uint8_t)C] = true; + S = S.substr(3); + } + + for (char C : S) + BV[(uint8_t)C] = true; + return BV; +} + +// Identify brace expansions in S and return the list of patterns they expand +// into. +static Expected<SmallVector<std::string, 1>> +parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) { + SmallVector<std::string> SubPatterns = {S.str()}; + if (!MaxSubPatterns || !S.contains('{')) + return std::move(SubPatterns); + + struct BraceExpansion { + size_t Start; + size_t Length; + SmallVector<StringRef, 2> Terms; + }; + SmallVector<BraceExpansion, 0> BraceExpansions; + + BraceExpansion *CurrentBE = nullptr; + size_t TermBegin; + for (size_t I = 0, E = S.size(); I != E; ++I) { + if (S[I] == '[') { + I = S.find(']', I + 2); + if (I == std::string::npos) + return make_error<StringError>("invalid glob pattern, unmatched '['", + errc::invalid_argument); + } else if (S[I] == '{') { + if (CurrentBE) + return make_error<StringError>( + "nested brace expansions are not supported", + errc::invalid_argument); + CurrentBE = &BraceExpansions.emplace_back(); + CurrentBE->Start = I; + TermBegin = I + 1; + } else if (S[I] == ',') { + if (!CurrentBE) + continue; + CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin)); + TermBegin = I + 1; + } else if (S[I] == '}') { + if (!CurrentBE) + continue; + if (CurrentBE->Terms.empty()) + return make_error<StringError>( + "empty or singleton brace expansions are not supported", + errc::invalid_argument); + CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin)); + CurrentBE->Length = I - CurrentBE->Start + 1; + CurrentBE = nullptr; + } else if (S[I] == '\\') { + if (++I == E) + return make_error<StringError>("invalid glob pattern, stray '\\'", + errc::invalid_argument); + } + } + if (CurrentBE) + return make_error<StringError>("incomplete brace expansion", + errc::invalid_argument); + + size_t NumSubPatterns = 1; + for (auto &BE : BraceExpansions) { + if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) { + NumSubPatterns = std::numeric_limits<size_t>::max(); + break; + } + NumSubPatterns *= BE.Terms.size(); + } + if (NumSubPatterns > *MaxSubPatterns) + return make_error<StringError>("too many brace expansions", + errc::invalid_argument); + // Replace brace expansions in reverse order so that we don't invalidate + // earlier start indices + for (auto &BE : reverse(BraceExpansions)) { + SmallVector<std::string> OrigSubPatterns; + std::swap(SubPatterns, OrigSubPatterns); + for (StringRef Term : BE.Terms) + for (StringRef Orig : OrigSubPatterns) + SubPatterns.emplace_back(Orig).replace(BE.Start, BE.Length, Term); + } + return std::move(SubPatterns); +} + +Expected<GlobPattern> +GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { + GlobPattern Pat; + + // Store the prefix that does not contain any metacharacter. + size_t PrefixSize = S.find_first_of("?*[{\\"); + Pat.Prefix = S.substr(0, PrefixSize); + if (PrefixSize == std::string::npos) + return Pat; + S = S.substr(PrefixSize); + + SmallVector<std::string, 1> SubPats; + if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats)) + return std::move(Err); + for (StringRef SubPat : SubPats) { + auto SubGlobOrErr = SubGlobPattern::create(SubPat); + if (!SubGlobOrErr) + return SubGlobOrErr.takeError(); + Pat.SubGlobs.push_back(*SubGlobOrErr); + } + + return Pat; +} + +Expected<GlobPattern::SubGlobPattern> +GlobPattern::SubGlobPattern::create(StringRef S) { + SubGlobPattern Pat; + + // Parse brackets. + Pat.Pat.assign(S.begin(), S.end()); + for (size_t I = 0, E = S.size(); I != E; ++I) { + if (S[I] == '[') { + // ']' is allowed as the first character of a character class. '[]' is + // invalid. So, just skip the first character. + ++I; + size_t J = S.find(']', I + 1); + if (J == StringRef::npos) + return make_error<StringError>("invalid glob pattern, unmatched '['", + errc::invalid_argument); + StringRef Chars = S.substr(I, J - I); + bool Invert = S[I] == '^' || S[I] == '!'; + Expected<BitVector> BV = + Invert ? expand(Chars.substr(1), S) : expand(Chars, S); + if (!BV) + return BV.takeError(); + if (Invert) + BV->flip(); + Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)}); + I = J; + } else if (S[I] == '\\') { + if (++I == E) + return make_error<StringError>("invalid glob pattern, stray '\\'", + errc::invalid_argument); + } + } + return Pat; +} + +bool GlobPattern::match(StringRef S) const { + if (!S.consume_front(Prefix)) + return false; + if (SubGlobs.empty() && S.empty()) + return true; + for (auto &Glob : SubGlobs) + if (Glob.match(S)) + return true; + return false; +} + +// Factor the pattern into segments split by '*'. The segment is matched +// sequentianlly by finding the first occurrence past the end of the previous +// match. +bool GlobPattern::SubGlobPattern::match(StringRef Str) const { + const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(), + *SavedS = S; + const char *const PEnd = P + Pat.size(), *const End = S + Str.size(); + size_t B = 0, SavedB = 0; + while (S != End) { + if (P == PEnd) + ; + else if (*P == '*') { + // The non-* substring on the left of '*' matches the tail of S. Save the + // positions to be used by backtracking if we see a mismatch later. + SegmentBegin = ++P; + SavedS = S; + SavedB = B; + continue; + } else if (*P == '[') { + if (Brackets[B].Bytes[uint8_t(*S)]) { + P = Pat.data() + Brackets[B++].NextOffset; + ++S; + continue; + } + } else if (*P == '\\') { + if (*++P == *S) { + ++P; + ++S; + continue; + } + } else if (*P == *S || *P == '?') { + ++P; + ++S; + continue; + } + if (!SegmentBegin) + return false; + // We have seen a '*'. Backtrack to the saved positions. Shift the S + // position to probe the next starting position in the segment. + P = SegmentBegin; + S = ++SavedS; + B = SavedB; + } + // All bytes in Str have been matched. Return true if the rest part of Pat is + // empty or contains only '*'. + return getPat().find_first_not_of('*', P - Pat.data()) == std::string::npos; +} |
