1 files changed, 175 insertions, 84 deletions
diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp
index e2014f965c90..b13dc9ef4aee 100644
--- a/clang/lib/Tooling/Syntax/Tokens.cpp
+++ b/clang/lib/Tooling/Syntax/Tokens.cpp
@@ -18,8 +18,6 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -28,6 +26,7 @@
 #include <algorithm>
 #include <cassert>
 #include <iterator>
+#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
@@ -55,45 +54,140 @@ getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
   return {Begin, End};
 }
 
-// Finds the smallest expansion range that contains expanded tokens First and
-// Last, e.g.:
+// Finds the range within FID corresponding to expanded tokens [First, Last].
+// Prev precedes First and Next follows Last, these must *not* be included.
+// If no range satisfies the criteria, returns an invalid range.
+//
 // #define ID(x) x
 // ID(ID(ID(a1) a2))
 //          ~~       -> a1
 //              ~~   -> a2
 //       ~~~~~~~~~   -> a1 a2
-SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
-                                        const syntax::Token &Last,
-                                        const SourceManager &SM) {
-  SourceRange Res;
-  auto FirstLoc = First.location(), LastLoc = Last.location();
-  // Keep traversing up the spelling chain as longs as tokens are part of the
-  // same expansion.
-  while (!FirstLoc.isFileID() && !LastLoc.isFileID()) {
-    auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
-    auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
-    // Stop if expansions have diverged.
-    if (ExpInfoFirst.getExpansionLocStart() !=
-        ExpInfoLast.getExpansionLocStart())
+SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last,
+                                   SourceLocation Prev, SourceLocation Next,
+                                   FileID TargetFile,
+                                   const SourceManager &SM) {
+  // There are two main parts to this algorithm:
+  //  - identifying which spelled range covers the expanded tokens
+  //  - validating that this range doesn't cover any extra tokens (First/Last)
+  //
+  // We do these in order. However as we transform the expanded range into the
+  // spelled one, we adjust First/Last so the validation remains simple.
+
+  assert(SM.getSLocEntry(TargetFile).isFile());
+  // In most cases, to select First and Last we must return their expansion
+  // range, i.e. the whole of any macros they are included in.
+  //
+  // When First and Last are part of the *same macro arg* of a macro written
+  // in TargetFile, we that slice of the arg, i.e. their spelling range.
+  //
+  // Unwrap such macro calls. If the target file has A(B(C)), the
+  // SourceLocation stack of a token inside C shows us the expansion of A first,
+  // then B, then any macros inside C's body, then C itself.
+  // (This is the reverse of the order the PP applies the expansions in).
+  while (First.isMacroID() && Last.isMacroID()) {
+    auto DecFirst = SM.getDecomposedLoc(First);
+    auto DecLast = SM.getDecomposedLoc(Last);
+    auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion();
+    auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion();
+
+    if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion())
+      break;
+    // Locations are in the same macro arg if they expand to the same place.
+    // (They may still have different FileIDs - an arg can have >1 chunks!)
+    if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart())
       break;
-    // Do not continue into macro bodies.
-    if (!ExpInfoFirst.isMacroArgExpansion() ||
-        !ExpInfoLast.isMacroArgExpansion())
+    // Careful, given:
+    //   #define HIDE ID(ID(a))
+    //   ID(ID(HIDE))
+    // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2.
+    // We distinguish them by whether the macro expands into the target file.
+    // Fortunately, the target file ones will always appear first.
+    auto &ExpMacro =
+        SM.getSLocEntry(SM.getFileID(ExpFirst.getExpansionLocStart()))
+            .getExpansion();
+    if (ExpMacro.getExpansionLocStart().isMacroID())
       break;
-    FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
-    LastLoc = SM.getImmediateSpellingLoc(LastLoc);
-    // Update the result afterwards, as we want the tokens that triggered the
-    // expansion.
-    Res = {FirstLoc, LastLoc};
+    // Replace each endpoint with its spelling inside the macro arg.
+    // (This is getImmediateSpellingLoc without repeating lookups).
+    First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second);
+    Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second);
+
+    // Now: how do we adjust the previous/next bounds? Three cases:
+    // A) If they are also part of the same macro arg, we translate them too.
+    //   This will ensure that we don't select any macros nested within the
+    //   macro arg that cover extra tokens. Critical case:
+    //      #define ID(X) X
+    //      ID(prev target) // selecting 'target' succeeds
+    //      #define LARGE ID(prev target)
+    //      LARGE // selecting 'target' fails.
+    // B) They are not in the macro at all, then their expansion range is a
+    //    sibling to it, and we can safely substitute that.
+    //      #define PREV prev
+    //      #define ID(X) X
+    //      PREV ID(target) // selecting 'target' succeeds.
+    //      #define LARGE PREV ID(target)
+    //      LARGE // selecting 'target' fails.
+    // C) They are in a different arg of this macro, or the macro body.
+    //    Now selecting the whole macro arg is fine, but the whole macro is not.
+    //    Model this by setting using the edge of the macro call as the bound.
+    //      #define ID2(X, Y) X Y
+    //      ID2(prev, target) // selecting 'target' succeeds
+    //      #define LARGE ID2(prev, target)
+    //      LARGE // selecting 'target' fails
+    auto AdjustBound = [&](SourceLocation &Bound) {
+      if (Bound.isInvalid() || !Bound.isMacroID()) // Non-macro must be case B.
+        return;
+      auto DecBound = SM.getDecomposedLoc(Bound);
+      auto &ExpBound = SM.getSLocEntry(DecBound.first).getExpansion();
+      if (ExpBound.isMacroArgExpansion() &&
+          ExpBound.getExpansionLocStart() == ExpFirst.getExpansionLocStart()) {
+        // Case A: translate to (spelling) loc within the macro arg.
+        Bound = ExpBound.getSpellingLoc().getLocWithOffset(DecBound.second);
+        return;
+      }
+      while (Bound.isMacroID()) {
+        SourceRange Exp = SM.getImmediateExpansionRange(Bound).getAsRange();
+        if (Exp.getBegin() == ExpMacro.getExpansionLocStart()) {
+          // Case B: bounds become the macro call itself.
+          Bound = (&Bound == &Prev) ? Exp.getBegin() : Exp.getEnd();
+          return;
+        }
+        // Either case C, or expansion location will later find case B.
+        // We choose the upper bound for Prev and the lower one for Next:
+        //   ID(prev) target ID(next)
+        //          ^        ^
+        //      new-prev  new-next
+        Bound = (&Bound == &Prev) ? Exp.getEnd() : Exp.getBegin();
+      }
+    };
+    AdjustBound(Prev);
+    AdjustBound(Next);
   }
-  // Normally mapping back to expansion location here only changes FileID, as
-  // we've already found some tokens expanded from the same macro argument, and
-  // they should map to a consecutive subset of spelled tokens. Unfortunately
-  // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
-  // based on their FileID in addition to offsets. So even though we are
-  // referring to same tokens, SourceManager might tell us that one is before
-  // the other if they've got different FileIDs.
-  return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
+
+  // In all remaining cases we need the full containing macros.
+  // If this overlaps Prev or Next, then no range is possible.
+  SourceRange Candidate =
+      SM.getExpansionRange(SourceRange(First, Last)).getAsRange();
+  auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin());
+  auto DecLast = SM.getDecomposedLoc(Candidate.getEnd());
+  // Can end up in the wrong file due to bad input or token-pasting shenanigans.
+  if (Candidate.isInvalid() || DecFirst.first != TargetFile || DecLast.first != TargetFile)
+    return SourceRange();
+  // Check bounds, which may still be inside macros.
+  if (Prev.isValid()) {
+    auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin());
+    if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second)
+      return SourceRange();
+  }
+  if (Next.isValid()) {
+    auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd());
+    if (Dec.first != DecLast.first || Dec.second <= DecLast.second)
+      return SourceRange();
+  }
+  // Now we know that Candidate is a file range that covers [First, Last]
+  // without encroaching on {Prev, Next}. Ship it!
+  return Candidate;
 }
 
 } // namespace
@@ -331,8 +425,8 @@ TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
   // Avoid returning empty ranges.
   if (ExpandedBegin == ExpandedEnd)
     return {};
-  return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
-                             ExpandedTokens.data() + ExpandedEnd)};
+  return {llvm::ArrayRef(ExpandedTokens.data() + ExpandedBegin,
+                         ExpandedTokens.data() + ExpandedEnd)};
 }
 
 llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
@@ -357,57 +451,54 @@ std::string TokenBuffer::Mapping::str() const {
                     BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
 }
 
-llvm::Optional<llvm::ArrayRef<syntax::Token>>
+std::optional<llvm::ArrayRef<syntax::Token>>
 TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
   // Mapping an empty range is ambiguous in case of empty mappings at either end
   // of the range, bail out in that case.
   if (Expanded.empty())
-    return llvm::None;
-
-  const syntax::Token *BeginSpelled;
-  const Mapping *BeginMapping;
-  std::tie(BeginSpelled, BeginMapping) =
-      spelledForExpandedToken(&Expanded.front());
-
-  const syntax::Token *LastSpelled;
-  const Mapping *LastMapping;
-  std::tie(LastSpelled, LastMapping) =
-      spelledForExpandedToken(&Expanded.back());
+    return std::nullopt;
+  const syntax::Token *First = &Expanded.front();
+  const syntax::Token *Last = &Expanded.back();
+  auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First);
+  auto [LastSpelled, LastMapping] = spelledForExpandedToken(Last);
 
-  FileID FID = SourceMgr->getFileID(BeginSpelled->location());
+  FileID FID = SourceMgr->getFileID(FirstSpelled->location());
   // FIXME: Handle multi-file changes by trying to map onto a common root.
   if (FID != SourceMgr->getFileID(LastSpelled->location()))
-    return llvm::None;
+    return std::nullopt;
 
   const MarkedFile &File = Files.find(FID)->second;
 
-  // If both tokens are coming from a macro argument expansion, try and map to
-  // smallest part of the macro argument. BeginMapping && LastMapping check is
-  // only for performance, they are a prerequisite for Expanded.front() and
-  // Expanded.back() being part of a macro arg expansion.
-  if (BeginMapping && LastMapping &&
-      SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
-      SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
-    auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
-                                                   Expanded.back(), *SourceMgr);
-    // It might be the case that tokens are arguments of different macro calls,
-    // in that case we should continue with the logic below instead of returning
-    // an empty range.
-    if (CommonRange.isValid())
-      return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
+  // If the range is within one macro argument, the result may be only part of a
+  // Mapping. We must use the general (SourceManager-based) algorithm.
+  if (FirstMapping && FirstMapping == LastMapping &&
+      SourceMgr->isMacroArgExpansion(First->location()) &&
+      SourceMgr->isMacroArgExpansion(Last->location())) {
+    // We use excluded Prev/Next token for bounds checking.
+    SourceLocation Prev = (First == &ExpandedTokens.front())
+                              ? SourceLocation()
+                              : (First - 1)->location();
+    SourceLocation Next = (Last == &ExpandedTokens.back())
+                              ? SourceLocation()
+                              : (Last + 1)->location();
+    SourceRange Range = spelledForExpandedSlow(
+        First->location(), Last->location(), Prev, Next, FID, *SourceMgr);
+    if (Range.isInvalid())
+      return std::nullopt;
+    return getTokensCovering(File.SpelledTokens, Range, *SourceMgr);
   }
 
+  // Otherwise, use the fast version based on Mappings.
   // Do not allow changes that doesn't cover full expansion.
-  unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
-  unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
-  if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded)
-    return llvm::None;
-  if (LastMapping && LastMapping->EndExpanded != EndExpanded)
-    return llvm::None;
-  // All is good, return the result.
-  return llvm::makeArrayRef(
-      BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
-                   : BeginSpelled,
+  unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data();
+  unsigned LastExpanded = Expanded.end() - ExpandedTokens.data();
+  if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded)
+    return std::nullopt;
+  if (LastMapping && LastMapping->EndExpanded != LastExpanded)
+    return std::nullopt;
+  return llvm::ArrayRef(
+      FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled
+                   : FirstSpelled,
       LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
                   : LastSpelled + 1);
 }
@@ -415,10 +506,10 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
 TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
                                                   const Mapping &M) const {
   Expansion E;
-  E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
-                                 F.SpelledTokens.data() + M.EndSpelled);
-  E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded,
-                                  ExpandedTokens.data() + M.EndExpanded);
+  E.Spelled = llvm::ArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
+                             F.SpelledTokens.data() + M.EndSpelled);
+  E.Expanded = llvm::ArrayRef(ExpandedTokens.data() + M.BeginExpanded,
+                              ExpandedTokens.data() + M.EndExpanded);
   return E;
 }
 
@@ -441,7 +532,7 @@ TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
   return File;
 }
 
-llvm::Optional<TokenBuffer::Expansion>
+std::optional<TokenBuffer::Expansion>
 TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
   assert(Spelled);
   const auto &File = fileForSpelled(*Spelled);
@@ -451,7 +542,7 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
     return M.BeginSpelled < SpelledIndex;
   });
   if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
-    return llvm::None;
+    return std::nullopt;
   return makeExpansion(File, *M);
 }
 
@@ -483,8 +574,8 @@ syntax::spelledTokensTouching(SourceLocation Loc,
   bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
   bool AcceptLeft =
       Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
-  return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
-                            Right + (AcceptRight ? 1 : 0));
+  return llvm::ArrayRef(Right - (AcceptLeft ? 1 : 0),
+                        Right + (AcceptRight ? 1 : 0));
 }
 
 llvm::ArrayRef<syntax::Token>
@@ -714,7 +805,7 @@ private:
   // In the simplest case, skips spelled tokens until finding one that produced
   // the NextExpanded token, and creates an empty mapping for them.
   // If Drain is provided, skips remaining tokens from that file instead.
-  void discard(llvm::Optional<FileID> Drain = llvm::None) {
+  void discard(std::optional<FileID> Drain = std::nullopt) {
     SourceLocation Target =
         Drain ? SM.getLocForEndOfFile(*Drain)
               : SM.getExpansionLoc(
@@ -751,7 +842,7 @@ private:
                SpelledTokens[NextSpelled].location() <= KnownEnd)
           ++NextSpelled;
         FlushMapping(); // Emits [NextSpelled, KnownEnd]
-        // Now the loop contitues and will emit (KnownEnd, Target).
+        // Now the loop continues and will emit (KnownEnd, Target).
       } else {
         ++NextSpelled;
       }
@@ -891,7 +982,7 @@ std::string TokenBuffer::dumpForTests() const {
   OS << "expanded tokens:\n"
      << "  ";
   // (!) we do not show '<eof>'.
-  DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
+  DumpTokens(OS, llvm::ArrayRef(ExpandedTokens).drop_back());
   OS << "\n";
 
   std::vector<FileID> Keys;