diff options
Diffstat (limited to 'clang/lib/Tooling/Transformer')
-rw-r--r-- | clang/lib/Tooling/Transformer/Parsing.cpp | 279 | ||||
-rw-r--r-- | clang/lib/Tooling/Transformer/RangeSelector.cpp | 9 | ||||
-rw-r--r-- | clang/lib/Tooling/Transformer/RewriteRule.cpp | 68 | ||||
-rw-r--r-- | clang/lib/Tooling/Transformer/SourceCode.cpp | 370 | ||||
-rw-r--r-- | clang/lib/Tooling/Transformer/Stencil.cpp | 66 | ||||
-rw-r--r-- | clang/lib/Tooling/Transformer/Transformer.cpp | 44 |
6 files changed, 754 insertions, 82 deletions
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp new file mode 100644 index 0000000000000..1579115b93138 --- /dev/null +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -0,0 +1,279 @@ +//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Transformer/Parsing.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "clang/Tooling/Transformer/SourceCode.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include <string> +#include <utility> +#include <vector> + +using namespace clang; +using namespace transformer; + +// FIXME: This implementation is entirely separate from that of the AST +// matchers. Given the similarity of the languages and uses of the two parsers, +// the two should share a common parsing infrastructure, as should other +// Transformer types. We intend to unify this implementation soon to share as +// much as possible with the AST Matchers parsing. + +namespace { +using llvm::Error; +using llvm::Expected; + +template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); + +struct ParseState { + // The remaining input to be processed. + StringRef Input; + // The original input. Not modified during parsing; only for reference in + // error reporting. + StringRef OriginalInput; +}; + +// Represents an intermediate result returned by a parsing function. Functions +// that don't generate values should use `llvm::None` +template <typename ResultType> struct ParseProgress { + ParseState State; + // Intermediate result generated by the Parser. + ResultType Value; +}; + +template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; +template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); + +class ParseError : public llvm::ErrorInfo<ParseError> { +public: + // Required field for all ErrorInfo derivatives. + static char ID; + + ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) + : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), + Excerpt(std::move(InputExcerpt)) {} + + void log(llvm::raw_ostream &OS) const override { + OS << "parse error at position (" << Pos << "): " << ErrorMsg + << ": " + Excerpt; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + + // Position of the error in the input string. + size_t Pos; + std::string ErrorMsg; + // Excerpt of the input starting at the error position. + std::string Excerpt; +}; + +char ParseError::ID; +} // namespace + +static const llvm::StringMap<RangeSelectorOp<std::string>> & +getUnaryStringSelectors() { + static const llvm::StringMap<RangeSelectorOp<std::string>> M = { + {"name", name}, + {"node", node}, + {"statement", statement}, + {"statements", statements}, + {"member", member}, + {"callArgs", callArgs}, + {"elseBranch", elseBranch}, + {"initListElements", initListElements}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & +getUnaryRangeSelectors() { + static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { + {"before", before}, {"after", after}, {"expansion", expansion}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & +getBinaryStringSelectors() { + static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { + {"encloseNodes", range}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & +getBinaryRangeSelectors() { + static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> + M = {{"enclose", range}}; + return M; +} + +template <typename Element> +llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map, + llvm::StringRef Key) { + auto it = Map.find(Key); + if (it == Map.end()) + return llvm::None; + return it->second; +} + +template <typename ResultType> +ParseProgress<ResultType> makeParseProgress(ParseState State, + ResultType Result) { + return ParseProgress<ResultType>{State, std::move(Result)}; +} + +static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { + size_t Pos = S.OriginalInput.size() - S.Input.size(); + return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), + S.OriginalInput.substr(Pos, 20).str()); +} + +// Returns a new ParseState that advances \c S by \c N characters. +static ParseState advance(ParseState S, size_t N) { + S.Input = S.Input.drop_front(N); + return S; +} + +static StringRef consumeWhitespace(StringRef S) { + return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); }); +} + +// Parses a single expected character \c c from \c State, skipping preceding +// whitespace. Error if the expected character isn't found. +static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) { + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty() || State.Input.front() != c) + return makeParseError(State, + ("expected char not found: " + llvm::Twine(c)).str()); + return makeParseProgress(advance(State, 1), llvm::None); +} + +// Parses an identitifer "token" -- handles preceding whitespace. +static ExpectedProgress<std::string> parseId(ParseState State) { + State.Input = consumeWhitespace(State.Input); + auto Id = State.Input.take_while( + [](char c) { return c >= 0 && isIdentifierBody(c); }); + if (Id.empty()) + return makeParseError(State, "failed to parse name"); + return makeParseProgress(advance(State, Id.size()), Id.str()); +} + +// For consistency with the AST matcher parser and C++ code, node ids are +// written as strings. However, we do not support escaping in the string. +static ExpectedProgress<std::string> parseStringId(ParseState State) { + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty()) + return makeParseError(State, "unexpected end of input"); + if (!State.Input.consume_front("\"")) + return makeParseError( + State, + "expecting string, but encountered other character or end of input"); + + StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); + if (State.Input.size() == Id.size()) + return makeParseError(State, "unterminated string"); + // Advance past the trailing quote as well. + return makeParseProgress(advance(State, Id.size() + 1), Id.str()); +} + +// Parses a single element surrounded by parens. `Op` is applied to the parsed +// result to create the result of this function call. +template <typename T> +ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, + RangeSelectorOp<T> Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto E = ParseElement(P->State); + if (!E) + return E.takeError(); + + P = parseChar(')', E->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, Op(std::move(E->Value))); +} + +// Parses a pair of elements surrounded by parens and separated by comma. `Op` +// is applied to the parsed results to create the result of this function call. +template <typename T> +ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, + RangeSelectorOp<T, T> Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto Left = ParseElement(P->State); + if (!Left) + return Left.takeError(); + + P = parseChar(',', Left->State); + if (!P) + return P.takeError(); + + auto Right = ParseElement(P->State); + if (!Right) + return Right.takeError(); + + P = parseChar(')', Right->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, + Op(std::move(Left->Value), std::move(Right->Value))); +} + +// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or +// Id operator). Returns StencilType representing the operator on success and +// error if it fails to parse input for an operator. +static ExpectedProgress<RangeSelector> +parseRangeSelectorImpl(ParseState State) { + auto Id = parseId(State); + if (!Id) + return Id.takeError(); + + std::string OpName = std::move(Id->Value); + if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) + return parseSingle(parseStringId, *Op, Id->State); + + if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) + return parseSingle(parseRangeSelectorImpl, *Op, Id->State); + + if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) + return parsePair(parseStringId, *Op, Id->State); + + if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) + return parsePair(parseRangeSelectorImpl, *Op, Id->State); + + return makeParseError(State, "unknown selector name: " + OpName); +} + +Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { + ParseState State = {Input, Input}; + ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); + if (!Result) + return Result.takeError(); + State = Result->State; + // Discard any potentially trailing whitespace. + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty()) + return Result->Value; + return makeParseError(State, "unexpected input after selector"); +} diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 9f81423c9022b..29b1a5b0372ea 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -23,8 +23,6 @@ using namespace clang; using namespace transformer; using ast_matchers::MatchFinder; -using ast_type_traits::ASTNodeKind; -using ast_type_traits::DynTypedNode; using llvm::Error; using llvm::StringError; @@ -148,7 +146,7 @@ RangeSelector transformer::statement(std::string ID) { }; } -RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) { +RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) { return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> { Expected<CharSourceRange> BeginRange = Begin(Result); if (!BeginRange) @@ -167,8 +165,9 @@ RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) { }; } -RangeSelector transformer::range(std::string BeginID, std::string EndID) { - return transformer::range(node(std::move(BeginID)), node(std::move(EndID))); +RangeSelector transformer::encloseNodes(std::string BeginID, + std::string EndID) { + return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID))); } RangeSelector transformer::member(std::string ID) { diff --git a/clang/lib/Tooling/Transformer/RewriteRule.cpp b/clang/lib/Tooling/Transformer/RewriteRule.cpp index 20d3a371950af..995bec03cd669 100644 --- a/clang/lib/Tooling/Transformer/RewriteRule.cpp +++ b/clang/lib/Tooling/Transformer/RewriteRule.cpp @@ -25,16 +25,14 @@ using namespace transformer; using ast_matchers::MatchFinder; using ast_matchers::internal::DynTypedMatcher; -using ast_type_traits::ASTNodeKind; using MatchResult = MatchFinder::MatchResult; -Expected<SmallVector<transformer::detail::Transformation, 1>> -transformer::detail::translateEdits(const MatchResult &Result, - llvm::ArrayRef<ASTEdit> Edits) { - SmallVector<transformer::detail::Transformation, 1> Transformations; - for (const auto &Edit : Edits) { - Expected<CharSourceRange> Range = Edit.TargetRange(Result); +static Expected<SmallVector<transformer::Edit, 1>> +translateEdits(const MatchResult &Result, ArrayRef<ASTEdit> ASTEdits) { + SmallVector<transformer::Edit, 1> Edits; + for (const auto &E : ASTEdits) { + Expected<CharSourceRange> Range = E.TargetRange(Result); if (!Range) return Range.takeError(); llvm::Optional<CharSourceRange> EditRange = @@ -42,21 +40,34 @@ transformer::detail::translateEdits(const MatchResult &Result, // FIXME: let user specify whether to treat this case as an error or ignore // it as is currently done. if (!EditRange) - return SmallVector<Transformation, 0>(); - auto Replacement = Edit.Replacement->eval(Result); + return SmallVector<Edit, 0>(); + auto Replacement = E.Replacement->eval(Result); if (!Replacement) return Replacement.takeError(); - transformer::detail::Transformation T; + transformer::Edit T; T.Range = *EditRange; T.Replacement = std::move(*Replacement); - Transformations.push_back(std::move(T)); + T.Metadata = E.Metadata; + Edits.push_back(std::move(T)); } - return Transformations; + return Edits; } -ASTEdit transformer::changeTo(RangeSelector S, TextGenerator Replacement) { +EditGenerator transformer::editList(SmallVector<ASTEdit, 1> Edits) { + return [Edits = std::move(Edits)](const MatchResult &Result) { + return translateEdits(Result, Edits); + }; +} + +EditGenerator transformer::edit(ASTEdit Edit) { + return [Edit = std::move(Edit)](const MatchResult &Result) { + return translateEdits(Result, {Edit}); + }; +} + +ASTEdit transformer::changeTo(RangeSelector Target, TextGenerator Replacement) { ASTEdit E; - E.TargetRange = std::move(S); + E.TargetRange = std::move(Target); E.Replacement = std::move(Replacement); return E; } @@ -83,8 +94,9 @@ ASTEdit transformer::remove(RangeSelector S) { return change(std::move(S), std::make_shared<SimpleTextGenerator>("")); } -RewriteRule transformer::makeRule(DynTypedMatcher M, SmallVector<ASTEdit, 1> Edits, - TextGenerator Explanation) { +RewriteRule transformer::makeRule(ast_matchers::internal::DynTypedMatcher M, + EditGenerator Edits, + TextGenerator Explanation) { return RewriteRule{{RewriteRule::Case{ std::move(M), std::move(Edits), std::move(Explanation), {}}}}; } @@ -105,10 +117,13 @@ static bool hasValidKind(const DynTypedMatcher &M) { #endif // Binds each rule's matcher to a unique (and deterministic) tag based on -// `TagBase` and the id paired with the case. +// `TagBase` and the id paired with the case. All of the returned matchers have +// their traversal kind explicitly set, either based on a pre-set kind or to the +// provided `DefaultTraversalKind`. static std::vector<DynTypedMatcher> taggedMatchers( StringRef TagBase, - const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases) { + const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases, + ast_type_traits::TraversalKind DefaultTraversalKind) { std::vector<DynTypedMatcher> Matchers; Matchers.reserve(Cases.size()); for (const auto &Case : Cases) { @@ -116,8 +131,10 @@ static std::vector<DynTypedMatcher> taggedMatchers( // HACK: Many matchers are not bindable, so ensure that tryBind will work. DynTypedMatcher BoundMatcher(Case.second.Matcher); BoundMatcher.setAllowBind(true); - auto M = BoundMatcher.tryBind(Tag); - Matchers.push_back(*std::move(M)); + auto M = *BoundMatcher.tryBind(Tag); + Matchers.push_back(!M.getTraversalKind() + ? M.withTraversalKind(DefaultTraversalKind) + : std::move(M)); } return Matchers; } @@ -147,14 +164,21 @@ transformer::detail::buildMatchers(const RewriteRule &Rule) { Buckets[Cases[I].Matcher.getSupportedKind()].emplace_back(I, Cases[I]); } + // Each anyOf explicitly controls the traversal kind. The anyOf itself is set + // to `TK_AsIs` to ensure no nodes are skipped, thereby deferring to the kind + // of the branches. Then, each branch is either left as is, if the kind is + // already set, or explicitly set to `TK_IgnoreUnlessSpelledInSource`. We + // choose this setting, because we think it is the one most friendly to + // beginners, who are (largely) the target audience of Transformer. std::vector<DynTypedMatcher> Matchers; for (const auto &Bucket : Buckets) { DynTypedMatcher M = DynTypedMatcher::constructVariadic( DynTypedMatcher::VO_AnyOf, Bucket.first, - taggedMatchers("Tag", Bucket.second)); + taggedMatchers("Tag", Bucket.second, TK_IgnoreUnlessSpelledInSource)); M.setAllowBind(true); // `tryBind` is guaranteed to succeed, because `AllowBind` was set to true. - Matchers.push_back(*M.tryBind(RewriteRule::RootID)); + Matchers.push_back( + M.tryBind(RewriteRule::RootID)->withTraversalKind(TK_AsIs)); } return Matchers; } diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp index 836401d1e605c..26b204851f058 100644 --- a/clang/lib/Tooling/Transformer/SourceCode.cpp +++ b/clang/lib/Tooling/Transformer/SourceCode.cpp @@ -10,10 +10,24 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Transformer/SourceCode.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Comment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include <set> using namespace clang; +using llvm::errc; +using llvm::StringError; + StringRef clang::tooling::getText(CharSourceRange Range, const ASTContext &Context) { return Lexer::getSourceText(Range, Context.getSourceManager(), @@ -23,11 +37,45 @@ StringRef clang::tooling::getText(CharSourceRange Range, CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range, tok::TokenKind Next, ASTContext &Context) { - Optional<Token> Tok = Lexer::findNextToken( - Range.getEnd(), Context.getSourceManager(), Context.getLangOpts()); - if (!Tok || !Tok->is(Next)) + CharSourceRange R = Lexer::getAsCharRange(Range, Context.getSourceManager(), + Context.getLangOpts()); + if (R.isInvalid()) + return Range; + Token Tok; + bool Err = + Lexer::getRawToken(R.getEnd(), Tok, Context.getSourceManager(), + Context.getLangOpts(), /*IgnoreWhiteSpace=*/true); + if (Err || !Tok.is(Next)) return Range; - return CharSourceRange::getTokenRange(Range.getBegin(), Tok->getLocation()); + return CharSourceRange::getTokenRange(Range.getBegin(), Tok.getLocation()); +} + +llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range, + const SourceManager &SM) { + if (Range.isInvalid()) + return llvm::make_error<StringError>(errc::invalid_argument, + "Invalid range"); + + if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) + return llvm::make_error<StringError>( + errc::invalid_argument, "Range starts or ends in a macro expansion"); + + if (SM.isInSystemHeader(Range.getBegin()) || + SM.isInSystemHeader(Range.getEnd())) + return llvm::make_error<StringError>(errc::invalid_argument, + "Range is in system header"); + + std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd()); + if (BeginInfo.first != EndInfo.first) + return llvm::make_error<StringError>( + errc::invalid_argument, "Range begins and ends in different files"); + + if (BeginInfo.second > EndInfo.second) + return llvm::make_error<StringError>( + errc::invalid_argument, "Range's begin is past its end"); + + return llvm::Error::success(); } llvm::Optional<CharSourceRange> @@ -46,20 +94,308 @@ clang::tooling::getRangeForEdit(const CharSourceRange &EditRange, // foo(DO_NOTHING(6)) // Decide whether the current behavior is desirable and modify if not. CharSourceRange Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts); - if (Range.isInvalid()) - return None; + bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM)); + if (IsInvalid) + return llvm::None; + return Range; - if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) - return None; - if (SM.isInSystemHeader(Range.getBegin()) || - SM.isInSystemHeader(Range.getEnd())) - return None; +} - std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin()); - std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd()); - if (BeginInfo.first != EndInfo.first || - BeginInfo.second > EndInfo.second) - return None; +static bool startsWithNewline(const SourceManager &SM, const Token &Tok) { + return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]); +} - return Range; +static bool contains(const std::set<tok::TokenKind> &Terminators, + const Token &Tok) { + return Terminators.count(Tok.getKind()) > 0; +} + +// Returns the exclusive, *file* end location of the entity whose last token is +// at location 'EntityLast'. That is, it returns the location one past the last +// relevant character. +// +// Associated tokens include comments, horizontal whitespace and 'Terminators' +// -- optional tokens, which, if any are found, will be included; if +// 'Terminators' is empty, we will not include any extra tokens beyond comments +// and horizontal whitespace. +static SourceLocation +getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast, + const std::set<tok::TokenKind> &Terminators, + const LangOptions &LangOpts) { + assert(EntityLast.isValid() && "Invalid end location found."); + + // We remember the last location of a non-horizontal-whitespace token we have + // lexed; this is the location up to which we will want to delete. + // FIXME: Support using the spelling loc here for cases where we want to + // analyze the macro text. + + CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast); + // FIXME: Should check isTokenRange(), for the (rare) case that + // `ExpansionRange` is a character range. + std::unique_ptr<Lexer> Lexer = [&]() { + bool Invalid = false; + auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd()); + llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid); + assert(!Invalid && "Cannot get file/offset"); + return std::make_unique<clang::Lexer>( + SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(), + File.data() + FileOffset.second, File.end()); + }(); + + // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown). + Lexer->SetKeepWhitespaceMode(true); + + // Generally, the code we want to include looks like this ([] are optional), + // If Terminators is empty: + // [ <comment> ] [ <newline> ] + // Otherwise: + // ... <terminator> [ <comment> ] [ <newline> ] + + Token Tok; + bool Terminated = false; + + // First, lex to the current token (which is the last token of the range that + // is definitely associated with the decl). Then, we process the first token + // separately from the rest based on conditions that hold specifically for + // that first token. + // + // We do not search for a terminator if none is required or we've already + // encountered it. Otherwise, if the original `EntityLast` location was in a + // macro expansion, we don't have visibility into the text, so we assume we've + // already terminated. However, we note this assumption with + // `TerminatedByMacro`, because we'll want to handle it somewhat differently + // for the terminators semicolon and comma. These terminators can be safely + // associated with the entity when they appear after the macro -- extra + // semicolons have no effect on the program and a well-formed program won't + // have multiple commas in a row, so we're guaranteed that there is only one. + // + // FIXME: This handling of macros is more conservative than necessary. When + // the end of the expansion coincides with the end of the node, we can still + // safely analyze the code. But, it is more complicated, because we need to + // start by lexing the spelling loc for the first token and then switch to the + // expansion loc. + bool TerminatedByMacro = false; + Lexer->LexFromRawLexer(Tok); + if (Terminators.empty() || contains(Terminators, Tok)) + Terminated = true; + else if (EntityLast.isMacroID()) { + Terminated = true; + TerminatedByMacro = true; + } + + // We save the most recent candidate for the exclusive end location. + SourceLocation End = Tok.getEndLoc(); + + while (!Terminated) { + // Lex the next token we want to possibly expand the range with. + Lexer->LexFromRawLexer(Tok); + + switch (Tok.getKind()) { + case tok::eof: + // Unexpected separators. + case tok::l_brace: + case tok::r_brace: + case tok::comma: + return End; + // Whitespace pseudo-tokens. + case tok::unknown: + if (startsWithNewline(SM, Tok)) + // Include at least until the end of the line. + End = Tok.getEndLoc(); + break; + default: + if (contains(Terminators, Tok)) + Terminated = true; + End = Tok.getEndLoc(); + break; + } + } + + do { + // Lex the next token we want to possibly expand the range with. + Lexer->LexFromRawLexer(Tok); + + switch (Tok.getKind()) { + case tok::unknown: + if (startsWithNewline(SM, Tok)) + // We're done, but include this newline. + return Tok.getEndLoc(); + break; + case tok::comment: + // Include any comments we find on the way. + End = Tok.getEndLoc(); + break; + case tok::semi: + case tok::comma: + if (TerminatedByMacro && contains(Terminators, Tok)) { + End = Tok.getEndLoc(); + // We've found a real terminator. + TerminatedByMacro = false; + break; + } + // Found an unrelated token; stop and don't include it. + return End; + default: + // Found an unrelated token; stop and don't include it. + return End; + } + } while (true); +} + +// Returns the expected terminator tokens for the given declaration. +// +// If we do not know the correct terminator token, returns an empty set. +// +// There are cases where we have more than one possible terminator (for example, +// we find either a comma or a semicolon after a VarDecl). +static std::set<tok::TokenKind> getTerminators(const Decl &D) { + if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D)) + return {tok::semi}; + + if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D)) + return {tok::r_brace, tok::semi}; + + if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D)) + return {tok::comma, tok::semi}; + + return {}; +} + +// Starting from `Loc`, skips whitespace up to, and including, a single +// newline. Returns the (exclusive) end of any skipped whitespace (that is, the +// location immediately after the whitespace). +static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM, + SourceLocation Loc, + const LangOptions &LangOpts) { + const char *LocChars = SM.getCharacterData(Loc); + int i = 0; + while (isHorizontalWhitespace(LocChars[i])) + ++i; + if (isVerticalWhitespace(LocChars[i])) + ++i; + return Loc.getLocWithOffset(i); +} + +// Is `Loc` separated from any following decl by something meaningful (e.g. an +// empty line, a comment), ignoring horizontal whitespace? Since this is a +// heuristic, we return false when in doubt. `Loc` cannot be the first location +// in the file. +static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc, + const LangOptions &LangOpts) { + // If the preceding character is a newline, we'll check for an empty line as a + // separator. However, we can't identify an empty line using tokens, so we + // analyse the characters. If we try to use tokens, we'll just end up with a + // whitespace token, whose characters we'd have to analyse anyhow. + bool Invalid = false; + const char *LocChars = + SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid); + assert(!Invalid && + "Loc must be a valid character and not the first of the source file."); + if (isVerticalWhitespace(LocChars[0])) { + for (int i = 1; isWhitespace(LocChars[i]); ++i) + if (isVerticalWhitespace(LocChars[i])) + return true; + } + // We didn't find an empty line, so lex the next token, skipping past any + // whitespace we just scanned. + Token Tok; + bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts, + /*IgnoreWhiteSpace=*/true); + if (Failed) + // Any text that confuses the lexer seems fair to consider a separation. + return true; + + switch (Tok.getKind()) { + case tok::comment: + case tok::l_brace: + case tok::r_brace: + case tok::eof: + return true; + default: + return false; + } +} + +CharSourceRange tooling::getAssociatedRange(const Decl &Decl, + ASTContext &Context) { + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange()); + + // First, expand to the start of the template<> declaration if necessary. + if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) { + if (const auto *T = Record->getDescribedClassTemplate()) + if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin())) + Range.setBegin(T->getBeginLoc()); + } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) { + if (const auto *T = F->getDescribedFunctionTemplate()) + if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin())) + Range.setBegin(T->getBeginLoc()); + } + + // Next, expand the end location past trailing comments to include a potential + // newline at the end of the decl's line. + Range.setEnd( + getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts)); + Range.setTokenRange(false); + + // Expand to include preceeding associated comments. We ignore any comments + // that are not preceeding the decl, since we've already skipped trailing + // comments with getEntityEndLoc. + if (const RawComment *Comment = + Decl.getASTContext().getRawCommentForDeclNoCache(&Decl)) + // Only include a preceding comment if: + // * it is *not* separate from the declaration (not including any newline + // that immediately follows the comment), + // * the decl *is* separate from any following entity (so, there are no + // other entities the comment could refer to), and + // * it is not a IfThisThenThat lint check. + if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(), + Range.getBegin()) && + !atOrBeforeSeparation( + SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts), + LangOpts) && + atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) { + const StringRef CommentText = Comment->getRawText(SM); + if (!CommentText.contains("LINT.IfChange") && + !CommentText.contains("LINT.ThenChange")) + Range.setBegin(Comment->getBeginLoc()); + } + // Add leading attributes. + for (auto *Attr : Decl.attrs()) { + if (Attr->getLocation().isInvalid() || + !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin())) + continue; + Range.setBegin(Attr->getLocation()); + + // Extend to the left '[[' or '__attribute((' if we saw the attribute, + // unless it is not a valid location. + bool Invalid; + StringRef Source = + SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid); + if (Invalid) + continue; + llvm::StringRef BeforeAttr = + Source.substr(0, SM.getFileOffset(Range.getBegin())); + llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim(); + + for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) { + // Handle whitespace between attribute prefix and attribute value. + if (BeforeAttrStripped.endswith(Prefix)) { + // Move start to start position of prefix, which is + // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix) + // positions to the left. + Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>( + -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size()))); + break; + // If we didn't see '[[' or '__attribute' it's probably coming from a + // macro expansion which is already handled by makeFileCharRange(), + // below. + } + } + } + + // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But, + // Range.getBegin() may be inside an expansion. + return Lexer::makeFileCharRange(Range, SM, LangOpts); } diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp index 8710e3cdf60f3..2670bf7adabf3 100644 --- a/clang/lib/Tooling/Transformer/Stencil.cpp +++ b/clang/lib/Tooling/Transformer/Stencil.cpp @@ -12,12 +12,14 @@ #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/Transformer/SourceCode.h" #include "clang/Tooling/Transformer/SourceCodeBuilders.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" #include <atomic> #include <memory> #include <string> @@ -26,7 +28,6 @@ using namespace clang; using namespace transformer; using ast_matchers::MatchFinder; -using ast_type_traits::DynTypedNode; using llvm::errc; using llvm::Error; using llvm::Expected; @@ -81,14 +82,14 @@ struct SelectorData { // A stencil operation to build a member access `e.m` or `e->m`, as appropriate. struct AccessData { AccessData(StringRef BaseId, Stencil Member) - : BaseId(BaseId), Member(std::move(Member)) {} + : BaseId(std::string(BaseId)), Member(std::move(Member)) {} std::string BaseId; Stencil Member; }; struct IfBoundData { IfBoundData(StringRef Id, Stencil TrueStencil, Stencil FalseStencil) - : Id(Id), TrueStencil(std::move(TrueStencil)), + : Id(std::string(Id)), TrueStencil(std::move(TrueStencil)), FalseStencil(std::move(FalseStencil)) {} std::string Id; Stencil TrueStencil; @@ -227,10 +228,37 @@ Error evalData(const UnaryOperationData &Data, Error evalData(const SelectorData &Data, const MatchFinder::MatchResult &Match, std::string *Result) { - auto Range = Data.Selector(Match); - if (!Range) - return Range.takeError(); - *Result += tooling::getText(*Range, *Match.Context); + auto RawRange = Data.Selector(Match); + if (!RawRange) + return RawRange.takeError(); + CharSourceRange Range = Lexer::makeFileCharRange( + *RawRange, *Match.SourceManager, Match.Context->getLangOpts()); + if (Range.isInvalid()) { + // Validate the original range to attempt to get a meaningful error message. + // If it's valid, then something else is the cause and we just return the + // generic failure message. + if (auto Err = tooling::validateEditRange(*RawRange, *Match.SourceManager)) + return handleErrors(std::move(Err), [](std::unique_ptr<StringError> E) { + assert(E->convertToErrorCode() == + llvm::make_error_code(errc::invalid_argument) && + "Validation errors must carry the invalid_argument code"); + return llvm::createStringError( + errc::invalid_argument, + "selected range could not be resolved to a valid source range; " + + E->getMessage()); + }); + return llvm::createStringError( + errc::invalid_argument, + "selected range could not be resolved to a valid source range"); + } + // Validate `Range`, because `makeFileCharRange` accepts some ranges that + // `validateEditRange` rejects. + if (auto Err = tooling::validateEditRange(Range, *Match.SourceManager)) + return joinErrors( + llvm::createStringError(errc::invalid_argument, + "selected range is not valid for editing"), + std::move(Err)); + *Result += tooling::getText(Range, *Match.Context); return Error::success(); } @@ -294,47 +322,41 @@ public: }; } // namespace -Stencil transformer::detail::makeStencil(StringRef Text) { return text(Text); } - -Stencil transformer::detail::makeStencil(RangeSelector Selector) { - return selection(std::move(Selector)); +Stencil transformer::detail::makeStencil(StringRef Text) { + return std::make_shared<StencilImpl<RawTextData>>(std::string(Text)); } -Stencil transformer::text(StringRef Text) { - return std::make_shared<StencilImpl<RawTextData>>(Text); -} - -Stencil transformer::selection(RangeSelector Selector) { +Stencil transformer::detail::makeStencil(RangeSelector Selector) { return std::make_shared<StencilImpl<SelectorData>>(std::move(Selector)); } Stencil transformer::dPrint(StringRef Id) { - return std::make_shared<StencilImpl<DebugPrintNodeData>>(Id); + return std::make_shared<StencilImpl<DebugPrintNodeData>>(std::string(Id)); } Stencil transformer::expression(llvm::StringRef Id) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::Parens, Id); + UnaryNodeOperator::Parens, std::string(Id)); } Stencil transformer::deref(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::Deref, ExprId); + UnaryNodeOperator::Deref, std::string(ExprId)); } Stencil transformer::maybeDeref(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::MaybeDeref, ExprId); + UnaryNodeOperator::MaybeDeref, std::string(ExprId)); } Stencil transformer::addressOf(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::AddressOf, ExprId); + UnaryNodeOperator::AddressOf, std::string(ExprId)); } Stencil transformer::maybeAddressOf(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::MaybeAddressOf, ExprId); + UnaryNodeOperator::MaybeAddressOf, std::string(ExprId)); } Stencil transformer::access(StringRef BaseId, Stencil Member) { diff --git a/clang/lib/Tooling/Transformer/Transformer.cpp b/clang/lib/Tooling/Transformer/Transformer.cpp index 71f0646f4c0eb..e8fc00c4e953f 100644 --- a/clang/lib/Tooling/Transformer/Transformer.cpp +++ b/clang/lib/Tooling/Transformer/Transformer.cpp @@ -12,6 +12,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Refactoring/AtomicChange.h" #include "llvm/Support/Error.h" +#include <map> #include <utility> #include <vector> @@ -31,7 +32,7 @@ void Transformer::run(const MatchFinder::MatchResult &Result) { transformer::RewriteRule::Case Case = transformer::detail::findSelectedCase(Result, Rule); - auto Transformations = transformer::detail::translateEdits(Result, Case.Edits); + auto Transformations = Case.Edits(Result); if (!Transformations) { Consumer(Transformations.takeError()); return; @@ -45,28 +46,39 @@ void Transformer::run(const MatchFinder::MatchResult &Result) { return; } - // Record the results in the AtomicChange, anchored at the location of the - // first change. - AtomicChange AC(*Result.SourceManager, - (*Transformations)[0].Range.getBegin()); + // Group the transformations, by file, into AtomicChanges, each anchored by + // the location of the first change in that file. + std::map<FileID, AtomicChange> ChangesByFileID; for (const auto &T : *Transformations) { + auto ID = Result.SourceManager->getFileID(T.Range.getBegin()); + auto Iter = ChangesByFileID + .emplace(ID, AtomicChange(*Result.SourceManager, + T.Range.getBegin(), T.Metadata)) + .first; + auto &AC = Iter->second; if (auto Err = AC.replace(*Result.SourceManager, T.Range, T.Replacement)) { Consumer(std::move(Err)); return; } } - for (const auto &I : Case.AddedIncludes) { - auto &Header = I.first; - switch (I.second) { - case transformer::IncludeFormat::Quoted: - AC.addHeader(Header); - break; - case transformer::IncludeFormat::Angled: - AC.addHeader((llvm::Twine("<") + Header + ">").str()); - break; + for (auto &IDChangePair : ChangesByFileID) { + auto &AC = IDChangePair.second; + // FIXME: this will add includes to *all* changed files, which may not be + // the intent. We should upgrade the representation to allow associating + // headers with specific edits. + for (const auto &I : Case.AddedIncludes) { + auto &Header = I.first; + switch (I.second) { + case transformer::IncludeFormat::Quoted: + AC.addHeader(Header); + break; + case transformer::IncludeFormat::Angled: + AC.addHeader((llvm::Twine("<") + Header + ">").str()); + break; + } } - } - Consumer(std::move(AC)); + Consumer(std::move(AC)); + } } |