diff options
Diffstat (limited to 'clang/lib/Tooling/Syntax/Tokens.cpp')
-rw-r--r-- | clang/lib/Tooling/Syntax/Tokens.cpp | 259 |
1 files changed, 175 insertions, 84 deletions
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index e2014f965c90..b13dc9ef4aee 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -18,8 +18,6 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -28,6 +26,7 @@ #include <algorithm> #include <cassert> #include <iterator> +#include <optional> #include <string> #include <utility> #include <vector> @@ -55,45 +54,140 @@ getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R, return {Begin, End}; } -// Finds the smallest expansion range that contains expanded tokens First and -// Last, e.g.: +// Finds the range within FID corresponding to expanded tokens [First, Last]. +// Prev precedes First and Next follows Last, these must *not* be included. +// If no range satisfies the criteria, returns an invalid range. +// // #define ID(x) x // ID(ID(ID(a1) a2)) // ~~ -> a1 // ~~ -> a2 // ~~~~~~~~~ -> a1 a2 -SourceRange findCommonRangeForMacroArgs(const syntax::Token &First, - const syntax::Token &Last, - const SourceManager &SM) { - SourceRange Res; - auto FirstLoc = First.location(), LastLoc = Last.location(); - // Keep traversing up the spelling chain as longs as tokens are part of the - // same expansion. - while (!FirstLoc.isFileID() && !LastLoc.isFileID()) { - auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion(); - auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion(); - // Stop if expansions have diverged. - if (ExpInfoFirst.getExpansionLocStart() != - ExpInfoLast.getExpansionLocStart()) +SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last, + SourceLocation Prev, SourceLocation Next, + FileID TargetFile, + const SourceManager &SM) { + // There are two main parts to this algorithm: + // - identifying which spelled range covers the expanded tokens + // - validating that this range doesn't cover any extra tokens (First/Last) + // + // We do these in order. However as we transform the expanded range into the + // spelled one, we adjust First/Last so the validation remains simple. + + assert(SM.getSLocEntry(TargetFile).isFile()); + // In most cases, to select First and Last we must return their expansion + // range, i.e. the whole of any macros they are included in. + // + // When First and Last are part of the *same macro arg* of a macro written + // in TargetFile, we that slice of the arg, i.e. their spelling range. + // + // Unwrap such macro calls. If the target file has A(B(C)), the + // SourceLocation stack of a token inside C shows us the expansion of A first, + // then B, then any macros inside C's body, then C itself. + // (This is the reverse of the order the PP applies the expansions in). + while (First.isMacroID() && Last.isMacroID()) { + auto DecFirst = SM.getDecomposedLoc(First); + auto DecLast = SM.getDecomposedLoc(Last); + auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion(); + auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion(); + + if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion()) + break; + // Locations are in the same macro arg if they expand to the same place. + // (They may still have different FileIDs - an arg can have >1 chunks!) + if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart()) break; - // Do not continue into macro bodies. - if (!ExpInfoFirst.isMacroArgExpansion() || - !ExpInfoLast.isMacroArgExpansion()) + // Careful, given: + // #define HIDE ID(ID(a)) + // ID(ID(HIDE)) + // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2. + // We distinguish them by whether the macro expands into the target file. + // Fortunately, the target file ones will always appear first. + auto &ExpMacro = + SM.getSLocEntry(SM.getFileID(ExpFirst.getExpansionLocStart())) + .getExpansion(); + if (ExpMacro.getExpansionLocStart().isMacroID()) break; - FirstLoc = SM.getImmediateSpellingLoc(FirstLoc); - LastLoc = SM.getImmediateSpellingLoc(LastLoc); - // Update the result afterwards, as we want the tokens that triggered the - // expansion. - Res = {FirstLoc, LastLoc}; + // Replace each endpoint with its spelling inside the macro arg. + // (This is getImmediateSpellingLoc without repeating lookups). + First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second); + Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second); + + // Now: how do we adjust the previous/next bounds? Three cases: + // A) If they are also part of the same macro arg, we translate them too. + // This will ensure that we don't select any macros nested within the + // macro arg that cover extra tokens. Critical case: + // #define ID(X) X + // ID(prev target) // selecting 'target' succeeds + // #define LARGE ID(prev target) + // LARGE // selecting 'target' fails. + // B) They are not in the macro at all, then their expansion range is a + // sibling to it, and we can safely substitute that. + // #define PREV prev + // #define ID(X) X + // PREV ID(target) // selecting 'target' succeeds. + // #define LARGE PREV ID(target) + // LARGE // selecting 'target' fails. + // C) They are in a different arg of this macro, or the macro body. + // Now selecting the whole macro arg is fine, but the whole macro is not. + // Model this by setting using the edge of the macro call as the bound. + // #define ID2(X, Y) X Y + // ID2(prev, target) // selecting 'target' succeeds + // #define LARGE ID2(prev, target) + // LARGE // selecting 'target' fails + auto AdjustBound = [&](SourceLocation &Bound) { + if (Bound.isInvalid() || !Bound.isMacroID()) // Non-macro must be case B. + return; + auto DecBound = SM.getDecomposedLoc(Bound); + auto &ExpBound = SM.getSLocEntry(DecBound.first).getExpansion(); + if (ExpBound.isMacroArgExpansion() && + ExpBound.getExpansionLocStart() == ExpFirst.getExpansionLocStart()) { + // Case A: translate to (spelling) loc within the macro arg. + Bound = ExpBound.getSpellingLoc().getLocWithOffset(DecBound.second); + return; + } + while (Bound.isMacroID()) { + SourceRange Exp = SM.getImmediateExpansionRange(Bound).getAsRange(); + if (Exp.getBegin() == ExpMacro.getExpansionLocStart()) { + // Case B: bounds become the macro call itself. + Bound = (&Bound == &Prev) ? Exp.getBegin() : Exp.getEnd(); + return; + } + // Either case C, or expansion location will later find case B. + // We choose the upper bound for Prev and the lower one for Next: + // ID(prev) target ID(next) + // ^ ^ + // new-prev new-next + Bound = (&Bound == &Prev) ? Exp.getEnd() : Exp.getBegin(); + } + }; + AdjustBound(Prev); + AdjustBound(Next); } - // Normally mapping back to expansion location here only changes FileID, as - // we've already found some tokens expanded from the same macro argument, and - // they should map to a consecutive subset of spelled tokens. Unfortunately - // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations - // based on their FileID in addition to offsets. So even though we are - // referring to same tokens, SourceManager might tell us that one is before - // the other if they've got different FileIDs. - return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange(); + + // In all remaining cases we need the full containing macros. + // If this overlaps Prev or Next, then no range is possible. + SourceRange Candidate = + SM.getExpansionRange(SourceRange(First, Last)).getAsRange(); + auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin()); + auto DecLast = SM.getDecomposedLoc(Candidate.getEnd()); + // Can end up in the wrong file due to bad input or token-pasting shenanigans. + if (Candidate.isInvalid() || DecFirst.first != TargetFile || DecLast.first != TargetFile) + return SourceRange(); + // Check bounds, which may still be inside macros. + if (Prev.isValid()) { + auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin()); + if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second) + return SourceRange(); + } + if (Next.isValid()) { + auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd()); + if (Dec.first != DecLast.first || Dec.second <= DecLast.second) + return SourceRange(); + } + // Now we know that Candidate is a file range that covers [First, Last] + // without encroaching on {Prev, Next}. Ship it! + return Candidate; } } // namespace @@ -331,8 +425,8 @@ TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { // Avoid returning empty ranges. if (ExpandedBegin == ExpandedEnd) return {}; - return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin, - ExpandedTokens.data() + ExpandedEnd)}; + return {llvm::ArrayRef(ExpandedTokens.data() + ExpandedBegin, + ExpandedTokens.data() + ExpandedEnd)}; } llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const { @@ -357,57 +451,54 @@ std::string TokenBuffer::Mapping::str() const { BeginSpelled, EndSpelled, BeginExpanded, EndExpanded)); } -llvm::Optional<llvm::ArrayRef<syntax::Token>> +std::optional<llvm::ArrayRef<syntax::Token>> TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { // Mapping an empty range is ambiguous in case of empty mappings at either end // of the range, bail out in that case. if (Expanded.empty()) - return llvm::None; - - const syntax::Token *BeginSpelled; - const Mapping *BeginMapping; - std::tie(BeginSpelled, BeginMapping) = - spelledForExpandedToken(&Expanded.front()); - - const syntax::Token *LastSpelled; - const Mapping *LastMapping; - std::tie(LastSpelled, LastMapping) = - spelledForExpandedToken(&Expanded.back()); + return std::nullopt; + const syntax::Token *First = &Expanded.front(); + const syntax::Token *Last = &Expanded.back(); + auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First); + auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last); - FileID FID = SourceMgr->getFileID(BeginSpelled->location()); + FileID FID = SourceMgr->getFileID(FirstSpelled->location()); // FIXME: Handle multi-file changes by trying to map onto a common root. if (FID != SourceMgr->getFileID(LastSpelled->location())) - return llvm::None; + return std::nullopt; const MarkedFile &File = Files.find(FID)->second; - // If both tokens are coming from a macro argument expansion, try and map to - // smallest part of the macro argument. BeginMapping && LastMapping check is - // only for performance, they are a prerequisite for Expanded.front() and - // Expanded.back() being part of a macro arg expansion. - if (BeginMapping && LastMapping && - SourceMgr->isMacroArgExpansion(Expanded.front().location()) && - SourceMgr->isMacroArgExpansion(Expanded.back().location())) { - auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(), - Expanded.back(), *SourceMgr); - // It might be the case that tokens are arguments of different macro calls, - // in that case we should continue with the logic below instead of returning - // an empty range. - if (CommonRange.isValid()) - return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr); + // If the range is within one macro argument, the result may be only part of a + // Mapping. We must use the general (SourceManager-based) algorithm. + if (FirstMapping && FirstMapping == LastMapping && + SourceMgr->isMacroArgExpansion(First->location()) && + SourceMgr->isMacroArgExpansion(Last->location())) { + // We use excluded Prev/Next token for bounds checking. + SourceLocation Prev = (First == &ExpandedTokens.front()) + ? SourceLocation() + : (First - 1)->location(); + SourceLocation Next = (Last == &ExpandedTokens.back()) + ? SourceLocation() + : (Last + 1)->location(); + SourceRange Range = spelledForExpandedSlow( + First->location(), Last->location(), Prev, Next, FID, *SourceMgr); + if (Range.isInvalid()) + return std::nullopt; + return getTokensCovering(File.SpelledTokens, Range, *SourceMgr); } + // Otherwise, use the fast version based on Mappings. // Do not allow changes that doesn't cover full expansion. - unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data(); - unsigned EndExpanded = Expanded.end() - ExpandedTokens.data(); - if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded) - return llvm::None; - if (LastMapping && LastMapping->EndExpanded != EndExpanded) - return llvm::None; - // All is good, return the result. - return llvm::makeArrayRef( - BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled - : BeginSpelled, + unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data(); + unsigned LastExpanded = Expanded.end() - ExpandedTokens.data(); + if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded) + return std::nullopt; + if (LastMapping && LastMapping->EndExpanded != LastExpanded) + return std::nullopt; + return llvm::ArrayRef( + FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled + : FirstSpelled, LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled : LastSpelled + 1); } @@ -415,10 +506,10 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F, const Mapping &M) const { Expansion E; - E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled, - F.SpelledTokens.data() + M.EndSpelled); - E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded, - ExpandedTokens.data() + M.EndExpanded); + E.Spelled = llvm::ArrayRef(F.SpelledTokens.data() + M.BeginSpelled, + F.SpelledTokens.data() + M.EndSpelled); + E.Expanded = llvm::ArrayRef(ExpandedTokens.data() + M.BeginExpanded, + ExpandedTokens.data() + M.EndExpanded); return E; } @@ -441,7 +532,7 @@ TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { return File; } -llvm::Optional<TokenBuffer::Expansion> +std::optional<TokenBuffer::Expansion> TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const { assert(Spelled); const auto &File = fileForSpelled(*Spelled); @@ -451,7 +542,7 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const { return M.BeginSpelled < SpelledIndex; }); if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex) - return llvm::None; + return std::nullopt; return makeExpansion(File, *M); } @@ -483,8 +574,8 @@ syntax::spelledTokensTouching(SourceLocation Loc, bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc; bool AcceptLeft = Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc; - return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0), - Right + (AcceptRight ? 1 : 0)); + return llvm::ArrayRef(Right - (AcceptLeft ? 1 : 0), + Right + (AcceptRight ? 1 : 0)); } llvm::ArrayRef<syntax::Token> @@ -714,7 +805,7 @@ private: // In the simplest case, skips spelled tokens until finding one that produced // the NextExpanded token, and creates an empty mapping for them. // If Drain is provided, skips remaining tokens from that file instead. - void discard(llvm::Optional<FileID> Drain = llvm::None) { + void discard(std::optional<FileID> Drain = std::nullopt) { SourceLocation Target = Drain ? SM.getLocForEndOfFile(*Drain) : SM.getExpansionLoc( @@ -751,7 +842,7 @@ private: SpelledTokens[NextSpelled].location() <= KnownEnd) ++NextSpelled; FlushMapping(); // Emits [NextSpelled, KnownEnd] - // Now the loop contitues and will emit (KnownEnd, Target). + // Now the loop continues and will emit (KnownEnd, Target). } else { ++NextSpelled; } @@ -891,7 +982,7 @@ std::string TokenBuffer::dumpForTests() const { OS << "expanded tokens:\n" << " "; // (!) we do not show '<eof>'. - DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back()); + DumpTokens(OS, llvm::ArrayRef(ExpandedTokens).drop_back()); OS << "\n"; std::vector<FileID> Keys; |