summaryrefslogtreecommitdiff
path: root/lib/ASTMatchers/Dynamic/Parser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ASTMatchers/Dynamic/Parser.cpp')
-rw-r--r--lib/ASTMatchers/Dynamic/Parser.cpp683
1 files changed, 0 insertions, 683 deletions
diff --git a/lib/ASTMatchers/Dynamic/Parser.cpp b/lib/ASTMatchers/Dynamic/Parser.cpp
deleted file mode 100644
index e3b00b46832c1..0000000000000
--- a/lib/ASTMatchers/Dynamic/Parser.cpp
+++ /dev/null
@@ -1,683 +0,0 @@
-//===- Parser.cpp - Matcher expression parser -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Recursive parser implementation for the matcher expression grammar.
-///
-//===----------------------------------------------------------------------===//
-
-#include "clang/ASTMatchers/Dynamic/Parser.h"
-#include "clang/ASTMatchers/ASTMatchersInternal.h"
-#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
-#include "clang/ASTMatchers/Dynamic/Registry.h"
-#include "clang/Basic/CharInfo.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include <algorithm>
-#include <cassert>
-#include <cerrno>
-#include <cstddef>
-#include <cstdlib>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace clang {
-namespace ast_matchers {
-namespace dynamic {
-
-/// Simple structure to hold information for one token from the parser.
-struct Parser::TokenInfo {
- /// Different possible tokens.
- enum TokenKind {
- TK_Eof,
- TK_OpenParen,
- TK_CloseParen,
- TK_Comma,
- TK_Period,
- TK_Literal,
- TK_Ident,
- TK_InvalidChar,
- TK_Error,
- TK_CodeCompletion
- };
-
- /// Some known identifiers.
- static const char* const ID_Bind;
-
- TokenInfo() = default;
-
- StringRef Text;
- TokenKind Kind = TK_Eof;
- SourceRange Range;
- VariantValue Value;
-};
-
-const char* const Parser::TokenInfo::ID_Bind = "bind";
-
-/// Simple tokenizer for the parser.
-class Parser::CodeTokenizer {
-public:
- explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
- : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
- NextToken = getNextToken();
- }
-
- CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
- unsigned CodeCompletionOffset)
- : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
- CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
- NextToken = getNextToken();
- }
-
- /// Returns but doesn't consume the next token.
- const TokenInfo &peekNextToken() const { return NextToken; }
-
- /// Consumes and returns the next token.
- TokenInfo consumeNextToken() {
- TokenInfo ThisToken = NextToken;
- NextToken = getNextToken();
- return ThisToken;
- }
-
- TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
-
-private:
- TokenInfo getNextToken() {
- consumeWhitespace();
- TokenInfo Result;
- Result.Range.Start = currentLocation();
-
- if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
- Result.Kind = TokenInfo::TK_CodeCompletion;
- Result.Text = StringRef(CodeCompletionLocation, 0);
- CodeCompletionLocation = nullptr;
- return Result;
- }
-
- if (Code.empty()) {
- Result.Kind = TokenInfo::TK_Eof;
- Result.Text = "";
- return Result;
- }
-
- switch (Code[0]) {
- case '#':
- Result.Kind = TokenInfo::TK_Eof;
- Result.Text = "";
- return Result;
- case ',':
- Result.Kind = TokenInfo::TK_Comma;
- Result.Text = Code.substr(0, 1);
- Code = Code.drop_front();
- break;
- case '.':
- Result.Kind = TokenInfo::TK_Period;
- Result.Text = Code.substr(0, 1);
- Code = Code.drop_front();
- break;
- case '(':
- Result.Kind = TokenInfo::TK_OpenParen;
- Result.Text = Code.substr(0, 1);
- Code = Code.drop_front();
- break;
- case ')':
- Result.Kind = TokenInfo::TK_CloseParen;
- Result.Text = Code.substr(0, 1);
- Code = Code.drop_front();
- break;
-
- case '"':
- case '\'':
- // Parse a string literal.
- consumeStringLiteral(&Result);
- break;
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- // Parse an unsigned and float literal.
- consumeNumberLiteral(&Result);
- break;
-
- default:
- if (isAlphanumeric(Code[0])) {
- // Parse an identifier
- size_t TokenLength = 1;
- while (true) {
- // A code completion location in/immediately after an identifier will
- // cause the portion of the identifier before the code completion
- // location to become a code completion token.
- if (CodeCompletionLocation == Code.data() + TokenLength) {
- CodeCompletionLocation = nullptr;
- Result.Kind = TokenInfo::TK_CodeCompletion;
- Result.Text = Code.substr(0, TokenLength);
- Code = Code.drop_front(TokenLength);
- return Result;
- }
- if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
- break;
- ++TokenLength;
- }
- if (TokenLength == 4 && Code.startswith("true")) {
- Result.Kind = TokenInfo::TK_Literal;
- Result.Value = true;
- } else if (TokenLength == 5 && Code.startswith("false")) {
- Result.Kind = TokenInfo::TK_Literal;
- Result.Value = false;
- } else {
- Result.Kind = TokenInfo::TK_Ident;
- Result.Text = Code.substr(0, TokenLength);
- }
- Code = Code.drop_front(TokenLength);
- } else {
- Result.Kind = TokenInfo::TK_InvalidChar;
- Result.Text = Code.substr(0, 1);
- Code = Code.drop_front(1);
- }
- break;
- }
-
- Result.Range.End = currentLocation();
- return Result;
- }
-
- /// Consume an unsigned and float literal.
- void consumeNumberLiteral(TokenInfo *Result) {
- bool isFloatingLiteral = false;
- unsigned Length = 1;
- if (Code.size() > 1) {
- // Consume the 'x' or 'b' radix modifier, if present.
- switch (toLowercase(Code[1])) {
- case 'x': case 'b': Length = 2;
- }
- }
- while (Length < Code.size() && isHexDigit(Code[Length]))
- ++Length;
-
- // Try to recognize a floating point literal.
- while (Length < Code.size()) {
- char c = Code[Length];
- if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
- isFloatingLiteral = true;
- Length++;
- } else {
- break;
- }
- }
-
- Result->Text = Code.substr(0, Length);
- Code = Code.drop_front(Length);
-
- if (isFloatingLiteral) {
- char *end;
- errno = 0;
- std::string Text = Result->Text.str();
- double doubleValue = strtod(Text.c_str(), &end);
- if (*end == 0 && errno == 0) {
- Result->Kind = TokenInfo::TK_Literal;
- Result->Value = doubleValue;
- return;
- }
- } else {
- unsigned Value;
- if (!Result->Text.getAsInteger(0, Value)) {
- Result->Kind = TokenInfo::TK_Literal;
- Result->Value = Value;
- return;
- }
- }
-
- SourceRange Range;
- Range.Start = Result->Range.Start;
- Range.End = currentLocation();
- Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
- Result->Kind = TokenInfo::TK_Error;
- }
-
- /// Consume a string literal.
- ///
- /// \c Code must be positioned at the start of the literal (the opening
- /// quote). Consumed until it finds the same closing quote character.
- void consumeStringLiteral(TokenInfo *Result) {
- bool InEscape = false;
- const char Marker = Code[0];
- for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
- if (InEscape) {
- InEscape = false;
- continue;
- }
- if (Code[Length] == '\\') {
- InEscape = true;
- continue;
- }
- if (Code[Length] == Marker) {
- Result->Kind = TokenInfo::TK_Literal;
- Result->Text = Code.substr(0, Length + 1);
- Result->Value = Code.substr(1, Length - 1);
- Code = Code.drop_front(Length + 1);
- return;
- }
- }
-
- StringRef ErrorText = Code;
- Code = Code.drop_front(Code.size());
- SourceRange Range;
- Range.Start = Result->Range.Start;
- Range.End = currentLocation();
- Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
- Result->Kind = TokenInfo::TK_Error;
- }
-
- /// Consume all leading whitespace from \c Code.
- void consumeWhitespace() {
- while (!Code.empty() && isWhitespace(Code[0])) {
- if (Code[0] == '\n') {
- ++Line;
- StartOfLine = Code.drop_front();
- }
- Code = Code.drop_front();
- }
- }
-
- SourceLocation currentLocation() {
- SourceLocation Location;
- Location.Line = Line;
- Location.Column = Code.data() - StartOfLine.data() + 1;
- return Location;
- }
-
- StringRef Code;
- StringRef StartOfLine;
- unsigned Line = 1;
- Diagnostics *Error;
- TokenInfo NextToken;
- const char *CodeCompletionLocation = nullptr;
-};
-
-Parser::Sema::~Sema() = default;
-
-std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
- llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
- return {};
-}
-
-std::vector<MatcherCompletion>
-Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
- return {};
-}
-
-struct Parser::ScopedContextEntry {
- Parser *P;
-
- ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
- P->ContextStack.push_back(std::make_pair(C, 0u));
- }
-
- ~ScopedContextEntry() {
- P->ContextStack.pop_back();
- }
-
- void nextArg() {
- ++P->ContextStack.back().second;
- }
-};
-
-/// Parse expressions that start with an identifier.
-///
-/// This function can parse named values and matchers.
-/// In case of failure it will try to determine the user's intent to give
-/// an appropriate error message.
-bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
- const TokenInfo NameToken = Tokenizer->consumeNextToken();
-
- if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
- // Parse as a named value.
- if (const VariantValue NamedValue =
- NamedValues ? NamedValues->lookup(NameToken.Text)
- : VariantValue()) {
-
- if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
- *Value = NamedValue;
- return true;
- }
-
- std::string BindID;
- if (!parseBindID(BindID))
- return false;
-
- assert(NamedValue.isMatcher());
- llvm::Optional<DynTypedMatcher> Result =
- NamedValue.getMatcher().getSingleMatcher();
- if (Result.hasValue()) {
- llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
- if (Bound.hasValue()) {
- *Value = VariantMatcher::SingleMatcher(*Bound);
- return true;
- }
- }
- return false;
- }
- // If the syntax is correct and the name is not a matcher either, report
- // unknown named value.
- if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
- Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
- Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
- !S->lookupMatcherCtor(NameToken.Text)) {
- Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
- << NameToken.Text;
- return false;
- }
- // Otherwise, fallback to the matcher parser.
- }
-
- // Parse as a matcher expression.
- return parseMatcherExpressionImpl(NameToken, Value);
-}
-
-bool Parser::parseBindID(std::string &BindID) {
- // Parse .bind("foo")
- assert(Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period);
- Tokenizer->consumeNextToken(); // consume the period.
- const TokenInfo BindToken = Tokenizer->consumeNextToken();
- if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
- addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
- return false;
- }
-
- const TokenInfo OpenToken = Tokenizer->consumeNextToken();
- const TokenInfo IDToken = Tokenizer->consumeNextToken();
- const TokenInfo CloseToken = Tokenizer->consumeNextToken();
-
- // TODO: We could use different error codes for each/some to be more
- // explicit about the syntax error.
- if (BindToken.Kind != TokenInfo::TK_Ident ||
- BindToken.Text != TokenInfo::ID_Bind) {
- Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
- return false;
- }
- if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
- Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
- return false;
- }
- if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
- Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
- return false;
- }
- if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
- Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
- return false;
- }
- BindID = IDToken.Value.getString();
- return true;
-}
-
-/// Parse and validate a matcher expression.
-/// \return \c true on success, in which case \c Value has the matcher parsed.
-/// If the input is malformed, or some argument has an error, it
-/// returns \c false.
-bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
- VariantValue *Value) {
- assert(NameToken.Kind == TokenInfo::TK_Ident);
- const TokenInfo OpenToken = Tokenizer->consumeNextToken();
- if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
- Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
- << OpenToken.Text;
- return false;
- }
-
- llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
-
- if (!Ctor) {
- Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
- << NameToken.Text;
- // Do not return here. We need to continue to give completion suggestions.
- }
-
- std::vector<ParserValue> Args;
- TokenInfo EndToken;
-
- {
- ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
-
- while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
- if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
- // End of args.
- EndToken = Tokenizer->consumeNextToken();
- break;
- }
- if (!Args.empty()) {
- // We must find a , token to continue.
- const TokenInfo CommaToken = Tokenizer->consumeNextToken();
- if (CommaToken.Kind != TokenInfo::TK_Comma) {
- Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
- << CommaToken.Text;
- return false;
- }
- }
-
- Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
- NameToken.Text, NameToken.Range,
- Args.size() + 1);
- ParserValue ArgValue;
- ArgValue.Text = Tokenizer->peekNextToken().Text;
- ArgValue.Range = Tokenizer->peekNextToken().Range;
- if (!parseExpressionImpl(&ArgValue.Value)) {
- return false;
- }
-
- Args.push_back(ArgValue);
- SCE.nextArg();
- }
- }
-
- if (EndToken.Kind == TokenInfo::TK_Eof) {
- Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
- return false;
- }
-
- std::string BindID;
- if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
- if (!parseBindID(BindID))
- return false;
- }
-
- if (!Ctor)
- return false;
-
- // Merge the start and end infos.
- Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
- NameToken.Text, NameToken.Range);
- SourceRange MatcherRange = NameToken.Range;
- MatcherRange.End = EndToken.Range.End;
- VariantMatcher Result = S->actOnMatcherExpression(
- *Ctor, MatcherRange, BindID, Args, Error);
- if (Result.isNull()) return false;
-
- *Value = Result;
- return true;
-}
-
-// If the prefix of this completion matches the completion token, add it to
-// Completions minus the prefix.
-void Parser::addCompletion(const TokenInfo &CompToken,
- const MatcherCompletion& Completion) {
- if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
- Completion.Specificity > 0) {
- Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
- Completion.MatcherDecl, Completion.Specificity);
- }
-}
-
-std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
- ArrayRef<ArgKind> AcceptedTypes) {
- if (!NamedValues) return std::vector<MatcherCompletion>();
- std::vector<MatcherCompletion> Result;
- for (const auto &Entry : *NamedValues) {
- unsigned Specificity;
- if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
- std::string Decl =
- (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
- Result.emplace_back(Entry.getKey(), Decl, Specificity);
- }
- }
- return Result;
-}
-
-void Parser::addExpressionCompletions() {
- const TokenInfo CompToken = Tokenizer->consumeNextToken();
- assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
-
- // We cannot complete code if there is an invalid element on the context
- // stack.
- for (ContextStackTy::iterator I = ContextStack.begin(),
- E = ContextStack.end();
- I != E; ++I) {
- if (!I->first)
- return;
- }
-
- auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
- for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
- addCompletion(CompToken, Completion);
- }
-
- for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
- addCompletion(CompToken, Completion);
- }
-}
-
-/// Parse an <Expression>
-bool Parser::parseExpressionImpl(VariantValue *Value) {
- switch (Tokenizer->nextTokenKind()) {
- case TokenInfo::TK_Literal:
- *Value = Tokenizer->consumeNextToken().Value;
- return true;
-
- case TokenInfo::TK_Ident:
- return parseIdentifierPrefixImpl(Value);
-
- case TokenInfo::TK_CodeCompletion:
- addExpressionCompletions();
- return false;
-
- case TokenInfo::TK_Eof:
- Error->addError(Tokenizer->consumeNextToken().Range,
- Error->ET_ParserNoCode);
- return false;
-
- case TokenInfo::TK_Error:
- // This error was already reported by the tokenizer.
- return false;
-
- case TokenInfo::TK_OpenParen:
- case TokenInfo::TK_CloseParen:
- case TokenInfo::TK_Comma:
- case TokenInfo::TK_Period:
- case TokenInfo::TK_InvalidChar:
- const TokenInfo Token = Tokenizer->consumeNextToken();
- Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
- return false;
- }
-
- llvm_unreachable("Unknown token kind.");
-}
-
-static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
-
-Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
- const NamedValueMap *NamedValues, Diagnostics *Error)
- : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
- NamedValues(NamedValues), Error(Error) {}
-
-Parser::RegistrySema::~RegistrySema() = default;
-
-llvm::Optional<MatcherCtor>
-Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
- return Registry::lookupMatcherCtor(MatcherName);
-}
-
-VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
- MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
- ArrayRef<ParserValue> Args, Diagnostics *Error) {
- if (BindID.empty()) {
- return Registry::constructMatcher(Ctor, NameRange, Args, Error);
- } else {
- return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
- Error);
- }
-}
-
-std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
- ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
- return Registry::getAcceptedCompletionTypes(Context);
-}
-
-std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
- ArrayRef<ArgKind> AcceptedTypes) {
- return Registry::getMatcherCompletions(AcceptedTypes);
-}
-
-bool Parser::parseExpression(StringRef Code, Sema *S,
- const NamedValueMap *NamedValues,
- VariantValue *Value, Diagnostics *Error) {
- CodeTokenizer Tokenizer(Code, Error);
- if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
- return false;
- if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
- Error->addError(Tokenizer.peekNextToken().Range,
- Error->ET_ParserTrailingCode);
- return false;
- }
- return true;
-}
-
-std::vector<MatcherCompletion>
-Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
- const NamedValueMap *NamedValues) {
- Diagnostics Error;
- CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
- Parser P(&Tokenizer, S, NamedValues, &Error);
- VariantValue Dummy;
- P.parseExpressionImpl(&Dummy);
-
- // Sort by specificity, then by name.
- llvm::sort(P.Completions,
- [](const MatcherCompletion &A, const MatcherCompletion &B) {
- if (A.Specificity != B.Specificity)
- return A.Specificity > B.Specificity;
- return A.TypedText < B.TypedText;
- });
-
- return P.Completions;
-}
-
-llvm::Optional<DynTypedMatcher>
-Parser::parseMatcherExpression(StringRef Code, Sema *S,
- const NamedValueMap *NamedValues,
- Diagnostics *Error) {
- VariantValue Value;
- if (!parseExpression(Code, S, NamedValues, &Value, Error))
- return llvm::Optional<DynTypedMatcher>();
- if (!Value.isMatcher()) {
- Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
- return llvm::Optional<DynTypedMatcher>();
- }
- llvm::Optional<DynTypedMatcher> Result =
- Value.getMatcher().getSingleMatcher();
- if (!Result.hasValue()) {
- Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
- << Value.getTypeAsString();
- }
- return Result;
-}
-
-} // namespace dynamic
-} // namespace ast_matchers
-} // namespace clang