summaryrefslogtreecommitdiff
path: root/clang/lib/Lex
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Lex')
-rw-r--r--clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp955
-rw-r--r--clang/lib/Lex/HeaderMap.cpp242
-rw-r--r--clang/lib/Lex/HeaderSearch.cpp1801
-rw-r--r--clang/lib/Lex/Lexer.cpp3951
-rw-r--r--clang/lib/Lex/LiteralSupport.cpp1896
-rw-r--r--clang/lib/Lex/MacroArgs.cpp307
-rw-r--r--clang/lib/Lex/MacroInfo.cpp248
-rw-r--r--clang/lib/Lex/ModuleMap.cpp3010
-rw-r--r--clang/lib/Lex/PPCaching.cpp163
-rw-r--r--clang/lib/Lex/PPCallbacks.cpp13
-rw-r--r--clang/lib/Lex/PPConditionalDirectiveRecord.cpp119
-rw-r--r--clang/lib/Lex/PPDirectives.cpp3084
-rw-r--r--clang/lib/Lex/PPExpressions.cpp899
-rw-r--r--clang/lib/Lex/PPLexerChange.cpp834
-rw-r--r--clang/lib/Lex/PPMacroExpansion.cpp1845
-rw-r--r--clang/lib/Lex/Pragma.cpp1914
-rw-r--r--clang/lib/Lex/PreprocessingRecord.cpp516
-rw-r--r--clang/lib/Lex/Preprocessor.cpp1401
-rw-r--r--clang/lib/Lex/PreprocessorLexer.cpp52
-rw-r--r--clang/lib/Lex/ScratchBuffer.cpp83
-rw-r--r--clang/lib/Lex/TokenConcatenation.cpp297
-rw-r--r--clang/lib/Lex/TokenLexer.cpp1079
-rw-r--r--clang/lib/Lex/UnicodeCharSets.h407
23 files changed, 25116 insertions, 0 deletions
diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
new file mode 100644
index 000000000000..f063ed711c44
--- /dev/null
+++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -0,0 +1,955 @@
+//===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the implementation for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace {
+
+struct Minimizer {
+ /// Minimized output.
+ SmallVectorImpl<char> &Out;
+ /// The known tokens encountered during the minimization.
+ SmallVectorImpl<Token> &Tokens;
+
+ Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
+ StringRef Input, DiagnosticsEngine *Diags,
+ SourceLocation InputSourceLoc)
+ : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
+ InputSourceLoc(InputSourceLoc) {}
+
+ /// Lex the provided source and emit the minimized output.
+ ///
+ /// \returns True on error.
+ bool minimize();
+
+private:
+ struct IdInfo {
+ const char *Last;
+ StringRef Name;
+ };
+
+ /// Lex an identifier.
+ ///
+ /// \pre First points at a valid identifier head.
+ LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
+ LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
+ const char *const End);
+ LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
+ LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
+ const char *&First, const char *const End);
+ Token &makeToken(TokenKind K) {
+ Tokens.emplace_back(K, Out.size());
+ return Tokens.back();
+ }
+ void popToken() {
+ Out.resize(Tokens.back().Offset);
+ Tokens.pop_back();
+ }
+ TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
+
+ Minimizer &put(char Byte) {
+ Out.push_back(Byte);
+ return *this;
+ }
+ Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
+ Minimizer &append(const char *First, const char *Last) {
+ Out.append(First, Last);
+ return *this;
+ }
+
+ void printToNewline(const char *&First, const char *const End);
+ void printAdjacentModuleNameParts(const char *&First, const char *const End);
+ LLVM_NODISCARD bool printAtImportBody(const char *&First,
+ const char *const End);
+ void printDirectiveBody(const char *&First, const char *const End);
+ void printAdjacentMacroArgs(const char *&First, const char *const End);
+ LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
+
+ /// Reports a diagnostic if the diagnostic engine is provided. Always returns
+ /// true at the end.
+ bool reportError(const char *CurPtr, unsigned Err);
+
+ StringMap<char> SplitIds;
+ StringRef Input;
+ DiagnosticsEngine *Diags;
+ SourceLocation InputSourceLoc;
+};
+
+} // end anonymous namespace
+
+bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
+ if (!Diags)
+ return true;
+ assert(CurPtr >= Input.data() && "invalid buffer ptr");
+ Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
+ return true;
+}
+
+static void skipOverSpaces(const char *&First, const char *const End) {
+ while (First != End && isHorizontalWhitespace(*First))
+ ++First;
+}
+
+LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
+ const char *Current) {
+ assert(First <= Current);
+
+ // Check if we can even back up.
+ if (*Current != '"' || First == Current)
+ return false;
+
+ // Check for an "R".
+ --Current;
+ if (*Current != 'R')
+ return false;
+ if (First == Current || !isIdentifierBody(*--Current))
+ return true;
+
+ // Check for a prefix of "u", "U", or "L".
+ if (*Current == 'u' || *Current == 'U' || *Current == 'L')
+ return First == Current || !isIdentifierBody(*--Current);
+
+ // Check for a prefix of "u8".
+ if (*Current != '8' || First == Current || *Current-- != 'u')
+ return false;
+ return First == Current || !isIdentifierBody(*--Current);
+}
+
+static void skipRawString(const char *&First, const char *const End) {
+ assert(First[0] == '"');
+ assert(First[-1] == 'R');
+
+ const char *Last = ++First;
+ while (Last != End && *Last != '(')
+ ++Last;
+ if (Last == End) {
+ First = Last; // Hit the end... just give up.
+ return;
+ }
+
+ StringRef Terminator(First, Last - First);
+ for (;;) {
+ // Move First to just past the next ")".
+ First = Last;
+ while (First != End && *First != ')')
+ ++First;
+ if (First == End)
+ return;
+ ++First;
+
+ // Look ahead for the terminator sequence.
+ Last = First;
+ while (Last != End && size_t(Last - First) < Terminator.size() &&
+ Terminator[Last - First] == *Last)
+ ++Last;
+
+ // Check if we hit it (or the end of the file).
+ if (Last == End) {
+ First = Last;
+ return;
+ }
+ if (size_t(Last - First) < Terminator.size())
+ continue;
+ if (*Last != '"')
+ continue;
+ First = Last + 1;
+ return;
+ }
+}
+
+// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
+static unsigned isEOL(const char *First, const char *const End) {
+ if (First == End)
+ return 0;
+ if (End - First > 1 && isVerticalWhitespace(First[0]) &&
+ isVerticalWhitespace(First[1]) && First[0] != First[1])
+ return 2;
+ return !!isVerticalWhitespace(First[0]);
+}
+
+static void skipString(const char *&First, const char *const End) {
+ assert(*First == '\'' || *First == '"' || *First == '<');
+ const char Terminator = *First == '<' ? '>' : *First;
+ for (++First; First != End && *First != Terminator; ++First) {
+ // String and character literals don't extend past the end of the line.
+ if (isVerticalWhitespace(*First))
+ return;
+ if (*First != '\\')
+ continue;
+ // Skip past backslash to the next character. This ensures that the
+ // character right after it is skipped as well, which matters if it's
+ // the terminator.
+ if (++First == End)
+ return;
+ if (!isWhitespace(*First))
+ continue;
+ // Whitespace after the backslash might indicate a line continuation.
+ const char *FirstAfterBackslashPastSpace = First;
+ skipOverSpaces(FirstAfterBackslashPastSpace, End);
+ if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
+ // Advance the character pointer to the next line for the next
+ // iteration.
+ First = FirstAfterBackslashPastSpace + NLSize - 1;
+ }
+ }
+ if (First != End)
+ ++First; // Finish off the string.
+}
+
+// Returns the length of the skipped newline
+static unsigned skipNewline(const char *&First, const char *End) {
+ if (First == End)
+ return 0;
+ assert(isVerticalWhitespace(*First));
+ unsigned Len = isEOL(First, End);
+ assert(Len && "expected newline");
+ First += Len;
+ return Len;
+}
+
+static bool wasLineContinuation(const char *First, unsigned EOLLen) {
+ return *(First - (int)EOLLen - 1) == '\\';
+}
+
+static void skipToNewlineRaw(const char *&First, const char *const End) {
+ for (;;) {
+ if (First == End)
+ return;
+
+ unsigned Len = isEOL(First, End);
+ if (Len)
+ return;
+
+ do {
+ if (++First == End)
+ return;
+ Len = isEOL(First, End);
+ } while (!Len);
+
+ if (First[-1] != '\\')
+ return;
+
+ First += Len;
+ // Keep skipping lines...
+ }
+}
+
+static const char *findLastNonSpace(const char *First, const char *Last) {
+ assert(First <= Last);
+ while (First != Last && isHorizontalWhitespace(Last[-1]))
+ --Last;
+ return Last;
+}
+
+static const char *findFirstTrailingSpace(const char *First,
+ const char *Last) {
+ const char *LastNonSpace = findLastNonSpace(First, Last);
+ if (Last == LastNonSpace)
+ return Last;
+ assert(isHorizontalWhitespace(LastNonSpace[0]));
+ return LastNonSpace + 1;
+}
+
+static void skipLineComment(const char *&First, const char *const End) {
+ assert(First[0] == '/' && First[1] == '/');
+ First += 2;
+ skipToNewlineRaw(First, End);
+}
+
+static void skipBlockComment(const char *&First, const char *const End) {
+ assert(First[0] == '/' && First[1] == '*');
+ if (End - First < 4) {
+ First = End;
+ return;
+ }
+ for (First += 3; First != End; ++First)
+ if (First[-1] == '*' && First[0] == '/') {
+ ++First;
+ return;
+ }
+}
+
+/// \returns True if the current single quotation mark character is a C++ 14
+/// digit separator.
+static bool isQuoteCppDigitSeparator(const char *const Start,
+ const char *const Cur,
+ const char *const End) {
+ assert(*Cur == '\'' && "expected quotation character");
+ // skipLine called in places where we don't expect a valid number
+ // body before `start` on the same line, so always return false at the start.
+ if (Start == Cur)
+ return false;
+ // The previous character must be a valid PP number character.
+ // Make sure that the L, u, U, u8 prefixes don't get marked as a
+ // separator though.
+ char Prev = *(Cur - 1);
+ if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+ return false;
+ if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+ return false;
+ if (!isPreprocessingNumberBody(Prev))
+ return false;
+ // The next character should be a valid identifier body character.
+ return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
+}
+
+static void skipLine(const char *&First, const char *const End) {
+ for (;;) {
+ assert(First <= End);
+ if (First == End)
+ return;
+
+ if (isVerticalWhitespace(*First)) {
+ skipNewline(First, End);
+ return;
+ }
+ const char *Start = First;
+ while (First != End && !isVerticalWhitespace(*First)) {
+ // Iterate over strings correctly to avoid comments and newlines.
+ if (*First == '"' ||
+ (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
+ if (isRawStringLiteral(Start, First))
+ skipRawString(First, End);
+ else
+ skipString(First, End);
+ continue;
+ }
+
+ // Iterate over comments correctly.
+ if (*First != '/' || End - First < 2) {
+ ++First;
+ continue;
+ }
+
+ if (First[1] == '/') {
+ // "//...".
+ skipLineComment(First, End);
+ continue;
+ }
+
+ if (First[1] != '*') {
+ ++First;
+ continue;
+ }
+
+ // "/*...*/".
+ skipBlockComment(First, End);
+ }
+ if (First == End)
+ return;
+
+ // Skip over the newline.
+ unsigned Len = skipNewline(First, End);
+ if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
+ break;
+ }
+}
+
+static void skipDirective(StringRef Name, const char *&First,
+ const char *const End) {
+ if (llvm::StringSwitch<bool>(Name)
+ .Case("warning", true)
+ .Case("error", true)
+ .Default(false))
+ // Do not process quotes or comments.
+ skipToNewlineRaw(First, End);
+ else
+ skipLine(First, End);
+}
+
+void Minimizer::printToNewline(const char *&First, const char *const End) {
+ while (First != End && !isVerticalWhitespace(*First)) {
+ const char *Last = First;
+ do {
+ // Iterate over strings correctly to avoid comments and newlines.
+ if (*Last == '"' || *Last == '\'' ||
+ (*Last == '<' && top() == pp_include)) {
+ if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
+ skipRawString(Last, End);
+ else
+ skipString(Last, End);
+ continue;
+ }
+ if (*Last != '/' || End - Last < 2) {
+ ++Last;
+ continue; // Gather the rest up to print verbatim.
+ }
+
+ if (Last[1] != '/' && Last[1] != '*') {
+ ++Last;
+ continue;
+ }
+
+ // Deal with "//..." and "/*...*/".
+ append(First, findFirstTrailingSpace(First, Last));
+ First = Last;
+
+ if (Last[1] == '/') {
+ skipLineComment(First, End);
+ return;
+ }
+
+ put(' ');
+ skipBlockComment(First, End);
+ skipOverSpaces(First, End);
+ Last = First;
+ } while (Last != End && !isVerticalWhitespace(*Last));
+
+ // Print out the string.
+ const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
+ if (Last == End || LastBeforeTrailingSpace == First ||
+ LastBeforeTrailingSpace[-1] != '\\') {
+ append(First, LastBeforeTrailingSpace);
+ First = Last;
+ skipNewline(First, End);
+ return;
+ }
+
+ // Print up to the backslash, backing up over spaces. Preserve at least one
+ // space, as the space matters when tokens are separated by a line
+ // continuation.
+ append(First, findFirstTrailingSpace(
+ First, LastBeforeTrailingSpace - 1));
+
+ First = Last;
+ skipNewline(First, End);
+ skipOverSpaces(First, End);
+ }
+}
+
+static void skipWhitespace(const char *&First, const char *const End) {
+ for (;;) {
+ assert(First <= End);
+ skipOverSpaces(First, End);
+
+ if (End - First < 2)
+ return;
+
+ if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
+ skipNewline(++First, End);
+ continue;
+ }
+
+ // Check for a non-comment character.
+ if (First[0] != '/')
+ return;
+
+ // "// ...".
+ if (First[1] == '/') {
+ skipLineComment(First, End);
+ return;
+ }
+
+ // Cannot be a comment.
+ if (First[1] != '*')
+ return;
+
+ // "/*...*/".
+ skipBlockComment(First, End);
+ }
+}
+
+void Minimizer::printAdjacentModuleNameParts(const char *&First,
+ const char *const End) {
+ // Skip over parts of the body.
+ const char *Last = First;
+ do
+ ++Last;
+ while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
+ append(First, Last);
+ First = Last;
+}
+
+bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
+ for (;;) {
+ skipWhitespace(First, End);
+ if (First == End)
+ return true;
+
+ if (isVerticalWhitespace(*First)) {
+ skipNewline(First, End);
+ continue;
+ }
+
+ // Found a semicolon.
+ if (*First == ';') {
+ put(*First++).put('\n');
+ return false;
+ }
+
+ // Don't handle macro expansions inside @import for now.
+ if (!isIdentifierBody(*First) && *First != '.')
+ return true;
+
+ printAdjacentModuleNameParts(First, End);
+ }
+}
+
+void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
+ skipWhitespace(First, End); // Skip initial whitespace.
+ printToNewline(First, End);
+ while (Out.back() == ' ')
+ Out.pop_back();
+ put('\n');
+}
+
+LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
+ const char *const End) {
+ assert(isIdentifierBody(*First) && "invalid identifer");
+ const char *Last = First + 1;
+ while (Last != End && isIdentifierBody(*Last))
+ ++Last;
+ return Last;
+}
+
+LLVM_NODISCARD static const char *
+getIdentifierContinuation(const char *First, const char *const End) {
+ if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
+ return nullptr;
+
+ ++First;
+ skipNewline(First, End);
+ if (First == End)
+ return nullptr;
+ return isIdentifierBody(First[0]) ? First : nullptr;
+}
+
+Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
+ const char *const End) {
+ const char *Last = lexRawIdentifier(First, End);
+ const char *Next = getIdentifierContinuation(Last, End);
+ if (LLVM_LIKELY(!Next))
+ return IdInfo{Last, StringRef(First, Last - First)};
+
+ // Slow path, where identifiers are split over lines.
+ SmallVector<char, 64> Id(First, Last);
+ while (Next) {
+ Last = lexRawIdentifier(Next, End);
+ Id.append(Next, Last);
+ Next = getIdentifierContinuation(Last, End);
+ }
+ return IdInfo{
+ Last,
+ SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+}
+
+void Minimizer::printAdjacentMacroArgs(const char *&First,
+ const char *const End) {
+ // Skip over parts of the body.
+ const char *Last = First;
+ do
+ ++Last;
+ while (Last != End &&
+ (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
+ append(First, Last);
+ First = Last;
+}
+
+bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
+ assert(*First == '(');
+ put(*First++);
+ for (;;) {
+ skipWhitespace(First, End);
+ if (First == End)
+ return true;
+
+ if (*First == ')') {
+ put(*First++);
+ return false;
+ }
+
+ // This is intentionally fairly liberal.
+ if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
+ return true;
+
+ printAdjacentMacroArgs(First, End);
+ }
+}
+
+/// Looks for an identifier starting from Last.
+///
+/// Updates "First" to just past the next identifier, if any. Returns true iff
+/// the identifier matches "Id".
+bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
+ const char *const End) {
+ skipWhitespace(First, End);
+ if (First == End || !isIdentifierHead(*First))
+ return false;
+
+ IdInfo FoundId = lexIdentifier(First, End);
+ First = FoundId.Last;
+ return FoundId.Name == Id;
+}
+
+bool Minimizer::lexAt(const char *&First, const char *const End) {
+ // Handle "@import".
+ const char *ImportLoc = First++;
+ if (!isNextIdentifier("import", First, End)) {
+ skipLine(First, End);
+ return false;
+ }
+ makeToken(decl_at_import);
+ append("@import ");
+ if (printAtImportBody(First, End))
+ return reportError(
+ ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
+ skipWhitespace(First, End);
+ if (First == End)
+ return false;
+ if (!isVerticalWhitespace(*First))
+ return reportError(
+ ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
+ skipNewline(First, End);
+ return false;
+}
+
+bool Minimizer::lexModule(const char *&First, const char *const End) {
+ IdInfo Id = lexIdentifier(First, End);
+ First = Id.Last;
+ bool Export = false;
+ if (Id.Name == "export") {
+ Export = true;
+ skipWhitespace(First, End);
+ if (!isIdentifierBody(*First)) {
+ skipLine(First, End);
+ return false;
+ }
+ Id = lexIdentifier(First, End);
+ First = Id.Last;
+ }
+
+ if (Id.Name != "module" && Id.Name != "import") {
+ skipLine(First, End);
+ return false;
+ }
+
+ skipWhitespace(First, End);
+
+ // Ignore this as a module directive if the next character can't be part of
+ // an import.
+
+ switch (*First) {
+ case ':':
+ case '<':
+ case '"':
+ break;
+ default:
+ if (!isIdentifierBody(*First)) {
+ skipLine(First, End);
+ return false;
+ }
+ }
+
+ if (Export) {
+ makeToken(cxx_export_decl);
+ append("export ");
+ }
+
+ if (Id.Name == "module")
+ makeToken(cxx_module_decl);
+ else
+ makeToken(cxx_import_decl);
+ append(Id.Name);
+ append(" ");
+ printToNewline(First, End);
+ append("\n");
+ return false;
+}
+
+bool Minimizer::lexDefine(const char *&First, const char *const End) {
+ makeToken(pp_define);
+ append("#define ");
+ skipWhitespace(First, End);
+
+ if (!isIdentifierHead(*First))
+ return reportError(First, diag::err_pp_macro_not_identifier);
+
+ IdInfo Id = lexIdentifier(First, End);
+ const char *Last = Id.Last;
+ append(Id.Name);
+ if (Last == End)
+ return false;
+ if (*Last == '(') {
+ size_t Size = Out.size();
+ if (printMacroArgs(Last, End)) {
+ // Be robust to bad macro arguments, since they can show up in disabled
+ // code.
+ Out.resize(Size);
+ append("(/* invalid */\n");
+ skipLine(Last, End);
+ return false;
+ }
+ }
+ skipWhitespace(Last, End);
+ if (Last == End)
+ return false;
+ if (!isVerticalWhitespace(*Last))
+ put(' ');
+ printDirectiveBody(Last, End);
+ First = Last;
+ return false;
+}
+
+bool Minimizer::lexPragma(const char *&First, const char *const End) {
+ // #pragma.
+ skipWhitespace(First, End);
+ if (First == End || !isIdentifierHead(*First))
+ return false;
+
+ IdInfo FoundId = lexIdentifier(First, End);
+ First = FoundId.Last;
+ if (FoundId.Name == "once") {
+ // #pragma once
+ skipLine(First, End);
+ makeToken(pp_pragma_once);
+ append("#pragma once\n");
+ return false;
+ }
+
+ if (FoundId.Name != "clang") {
+ skipLine(First, End);
+ return false;
+ }
+
+ // #pragma clang.
+ if (!isNextIdentifier("module", First, End)) {
+ skipLine(First, End);
+ return false;
+ }
+
+ // #pragma clang module.
+ if (!isNextIdentifier("import", First, End)) {
+ skipLine(First, End);
+ return false;
+ }
+
+ // #pragma clang module import.
+ makeToken(pp_pragma_import);
+ append("#pragma clang module import ");
+ printDirectiveBody(First, End);
+ return false;
+}
+
+bool Minimizer::lexEndif(const char *&First, const char *const End) {
+ // Strip out "#else" if it's empty.
+ if (top() == pp_else)
+ popToken();
+
+ // Strip out "#elif" if they're empty.
+ while (top() == pp_elif)
+ popToken();
+
+ // If "#if" is empty, strip it and skip the "#endif".
+ if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
+ popToken();
+ skipLine(First, End);
+ return false;
+ }
+
+ return lexDefault(pp_endif, "endif", First, End);
+}
+
+bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
+ const char *&First, const char *const End) {
+ makeToken(Kind);
+ put('#').append(Directive).put(' ');
+ printDirectiveBody(First, End);
+ return false;
+}
+
+static bool isStartOfRelevantLine(char First) {
+ switch (First) {
+ case '#':
+ case '@':
+ case 'i':
+ case 'e':
+ case 'm':
+ return true;
+ }
+ return false;
+}
+
+bool Minimizer::lexPPLine(const char *&First, const char *const End) {
+ assert(First != End);
+
+ skipWhitespace(First, End);
+ assert(First <= End);
+ if (First == End)
+ return false;
+
+ if (!isStartOfRelevantLine(*First)) {
+ skipLine(First, End);
+ assert(First <= End);
+ return false;
+ }
+
+ // Handle "@import".
+ if (*First == '@')
+ return lexAt(First, End);
+
+ if (*First == 'i' || *First == 'e' || *First == 'm')
+ return lexModule(First, End);
+
+ // Handle preprocessing directives.
+ ++First; // Skip over '#'.
+ skipWhitespace(First, End);
+
+ if (First == End)
+ return reportError(First, diag::err_pp_expected_eol);
+
+ if (!isIdentifierHead(*First)) {
+ skipLine(First, End);
+ return false;
+ }
+
+ // Figure out the token.
+ IdInfo Id = lexIdentifier(First, End);
+ First = Id.Last;
+ auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
+ .Case("include", pp_include)
+ .Case("__include_macros", pp___include_macros)
+ .Case("define", pp_define)
+ .Case("undef", pp_undef)
+ .Case("import", pp_import)
+ .Case("include_next", pp_include_next)
+ .Case("if", pp_if)
+ .Case("ifdef", pp_ifdef)
+ .Case("ifndef", pp_ifndef)
+ .Case("elif", pp_elif)
+ .Case("else", pp_else)
+ .Case("endif", pp_endif)
+ .Case("pragma", pp_pragma_import)
+ .Default(pp_none);
+ if (Kind == pp_none) {
+ skipDirective(Id.Name, First, End);
+ return false;
+ }
+
+ if (Kind == pp_endif)
+ return lexEndif(First, End);
+
+ if (Kind == pp_define)
+ return lexDefine(First, End);
+
+ if (Kind == pp_pragma_import)
+ return lexPragma(First, End);
+
+ // Everything else.
+ return lexDefault(Kind, Id.Name, First, End);
+}
+
+static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
+ if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
+ First[2] == '\xbf')
+ First += 3;
+}
+
+bool Minimizer::minimizeImpl(const char *First, const char *const End) {
+ skipUTF8ByteOrderMark(First, End);
+ while (First != End)
+ if (lexPPLine(First, End))
+ return true;
+ return false;
+}
+
+bool Minimizer::minimize() {
+ bool Error = minimizeImpl(Input.begin(), Input.end());
+
+ if (!Error) {
+ // Add a trailing newline and an EOF on success.
+ if (!Out.empty() && Out.back() != '\n')
+ Out.push_back('\n');
+ makeToken(pp_eof);
+ }
+
+ // Null-terminate the output. This way the memory buffer that's passed to
+ // Clang will not have to worry about the terminating '\0'.
+ Out.push_back(0);
+ Out.pop_back();
+ return Error;
+}
+
+bool clang::minimize_source_to_dependency_directives::computeSkippedRanges(
+ ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
+ struct Directive {
+ enum DirectiveKind {
+ If, // if/ifdef/ifndef
+ Else // elif,else
+ };
+ int Offset;
+ DirectiveKind Kind;
+ };
+ llvm::SmallVector<Directive, 32> Offsets;
+ for (const Token &T : Input) {
+ switch (T.K) {
+ case pp_if:
+ case pp_ifdef:
+ case pp_ifndef:
+ Offsets.push_back({T.Offset, Directive::If});
+ break;
+
+ case pp_elif:
+ case pp_else: {
+ if (Offsets.empty())
+ return true;
+ int PreviousOffset = Offsets.back().Offset;
+ Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
+ Offsets.push_back({T.Offset, Directive::Else});
+ break;
+ }
+
+ case pp_endif: {
+ if (Offsets.empty())
+ return true;
+ int PreviousOffset = Offsets.back().Offset;
+ Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
+ do {
+ Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
+ if (Kind == Directive::If)
+ break;
+ } while (!Offsets.empty());
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+bool clang::minimizeSourceToDependencyDirectives(
+ StringRef Input, SmallVectorImpl<char> &Output,
+ SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
+ SourceLocation InputSourceLoc) {
+ Output.clear();
+ Tokens.clear();
+ return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
+}
diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp
new file mode 100644
index 000000000000..d44ef29c05d1
--- /dev/null
+++ b/clang/lib/Lex/HeaderMap.cpp
@@ -0,0 +1,242 @@
+//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HeaderMap interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Lex/HeaderMapTypes.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/FileManager.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/Debug.h"
+#include <cstring>
+#include <memory>
+using namespace clang;
+
+/// HashHMapKey - This is the 'well known' hash function required by the file
+/// format, used to look up keys in the hash table. The hash table uses simple
+/// linear probing based on this function.
+static inline unsigned HashHMapKey(StringRef Str) {
+ unsigned Result = 0;
+ const char *S = Str.begin(), *End = Str.end();
+
+ for (; S != End; S++)
+ Result += toLowercase(*S) * 13;
+ return Result;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Verification and Construction
+//===----------------------------------------------------------------------===//
+
+/// HeaderMap::Create - This attempts to load the specified file as a header
+/// map. If it doesn't look like a HeaderMap, it gives up and returns null.
+/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason
+/// into the string error argument and returns null.
+std::unique_ptr<HeaderMap> HeaderMap::Create(const FileEntry *FE,
+ FileManager &FM) {
+ // If the file is too small to be a header map, ignore it.
+ unsigned FileSize = FE->getSize();
+ if (FileSize <= sizeof(HMapHeader)) return nullptr;
+
+ auto FileBuffer = FM.getBufferForFile(FE);
+ if (!FileBuffer || !*FileBuffer)
+ return nullptr;
+ bool NeedsByteSwap;
+ if (!checkHeader(**FileBuffer, NeedsByteSwap))
+ return nullptr;
+ return std::unique_ptr<HeaderMap>(new HeaderMap(std::move(*FileBuffer), NeedsByteSwap));
+}
+
+bool HeaderMapImpl::checkHeader(const llvm::MemoryBuffer &File,
+ bool &NeedsByteSwap) {
+ if (File.getBufferSize() <= sizeof(HMapHeader))
+ return false;
+ const char *FileStart = File.getBufferStart();
+
+ // We know the file is at least as big as the header, check it now.
+ const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
+
+ // Sniff it to see if it's a headermap by checking the magic number and
+ // version.
+ if (Header->Magic == HMAP_HeaderMagicNumber &&
+ Header->Version == HMAP_HeaderVersion)
+ NeedsByteSwap = false;
+ else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) &&
+ Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
+ NeedsByteSwap = true; // Mixed endianness headermap.
+ else
+ return false; // Not a header map.
+
+ if (Header->Reserved != 0)
+ return false;
+
+ // Check the number of buckets. It should be a power of two, and there
+ // should be enough space in the file for all of them.
+ uint32_t NumBuckets = NeedsByteSwap
+ ? llvm::sys::getSwappedBytes(Header->NumBuckets)
+ : Header->NumBuckets;
+ if (!llvm::isPowerOf2_32(NumBuckets))
+ return false;
+ if (File.getBufferSize() <
+ sizeof(HMapHeader) + sizeof(HMapBucket) * NumBuckets)
+ return false;
+
+ // Okay, everything looks good.
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility Methods
+//===----------------------------------------------------------------------===//
+
+
+/// getFileName - Return the filename of the headermap.
+StringRef HeaderMapImpl::getFileName() const {
+ return FileBuffer->getBufferIdentifier();
+}
+
+unsigned HeaderMapImpl::getEndianAdjustedWord(unsigned X) const {
+ if (!NeedsBSwap) return X;
+ return llvm::ByteSwap_32(X);
+}
+
+/// getHeader - Return a reference to the file header, in unbyte-swapped form.
+/// This method cannot fail.
+const HMapHeader &HeaderMapImpl::getHeader() const {
+ // We know the file is at least as big as the header. Return it.
+ return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
+}
+
+/// getBucket - Return the specified hash table bucket from the header map,
+/// bswap'ing its fields as appropriate. If the bucket number is not valid,
+/// this return a bucket with an empty key (0).
+HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const {
+ assert(FileBuffer->getBufferSize() >=
+ sizeof(HMapHeader) + sizeof(HMapBucket) * BucketNo &&
+ "Expected bucket to be in range");
+
+ HMapBucket Result;
+ Result.Key = HMAP_EmptyBucketKey;
+
+ const HMapBucket *BucketArray =
+ reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
+ sizeof(HMapHeader));
+ const HMapBucket *BucketPtr = BucketArray+BucketNo;
+
+ // Load the values, bswapping as needed.
+ Result.Key = getEndianAdjustedWord(BucketPtr->Key);
+ Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
+ Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
+ return Result;
+}
+
+Optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const {
+ // Add the start of the string table to the idx.
+ StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
+
+ // Check for invalid index.
+ if (StrTabIdx >= FileBuffer->getBufferSize())
+ return None;
+
+ const char *Data = FileBuffer->getBufferStart() + StrTabIdx;
+ unsigned MaxLen = FileBuffer->getBufferSize() - StrTabIdx;
+ unsigned Len = strnlen(Data, MaxLen);
+
+ // Check whether the buffer is null-terminated.
+ if (Len == MaxLen && Data[Len - 1])
+ return None;
+
+ return StringRef(Data, Len);
+}
+
+//===----------------------------------------------------------------------===//
+// The Main Drivers
+//===----------------------------------------------------------------------===//
+
+/// dump - Print the contents of this headermap to stderr.
+LLVM_DUMP_METHOD void HeaderMapImpl::dump() const {
+ const HMapHeader &Hdr = getHeader();
+ unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+ llvm::dbgs() << "Header Map " << getFileName() << ":\n " << NumBuckets
+ << ", " << getEndianAdjustedWord(Hdr.NumEntries) << "\n";
+
+ auto getStringOrInvalid = [this](unsigned Id) -> StringRef {
+ if (Optional<StringRef> S = getString(Id))
+ return *S;
+ return "<invalid>";
+ };
+
+ for (unsigned i = 0; i != NumBuckets; ++i) {
+ HMapBucket B = getBucket(i);
+ if (B.Key == HMAP_EmptyBucketKey) continue;
+
+ StringRef Key = getStringOrInvalid(B.Key);
+ StringRef Prefix = getStringOrInvalid(B.Prefix);
+ StringRef Suffix = getStringOrInvalid(B.Suffix);
+ llvm::dbgs() << " " << i << ". " << Key << " -> '" << Prefix << "' '"
+ << Suffix << "'\n";
+ }
+}
+
+/// LookupFile - Check to see if the specified relative filename is located in
+/// this HeaderMap. If so, open it and return its FileEntry.
+Optional<FileEntryRef> HeaderMap::LookupFile(StringRef Filename,
+ FileManager &FM) const {
+
+ SmallString<1024> Path;
+ StringRef Dest = HeaderMapImpl::lookupFilename(Filename, Path);
+ if (Dest.empty())
+ return None;
+
+ return FM.getOptionalFileRef(Dest);
+}
+
+StringRef HeaderMapImpl::lookupFilename(StringRef Filename,
+ SmallVectorImpl<char> &DestPath) const {
+ const HMapHeader &Hdr = getHeader();
+ unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+ // Don't probe infinitely. This should be checked before constructing.
+ assert(llvm::isPowerOf2_32(NumBuckets) && "Expected power of 2");
+
+ // Linearly probe the hash table.
+ for (unsigned Bucket = HashHMapKey(Filename);; ++Bucket) {
+ HMapBucket B = getBucket(Bucket & (NumBuckets-1));
+ if (B.Key == HMAP_EmptyBucketKey) return StringRef(); // Hash miss.
+
+ // See if the key matches. If not, probe on.
+ Optional<StringRef> Key = getString(B.Key);
+ if (LLVM_UNLIKELY(!Key))
+ continue;
+ if (!Filename.equals_lower(*Key))
+ continue;
+
+ // If so, we have a match in the hash table. Construct the destination
+ // path.
+ Optional<StringRef> Prefix = getString(B.Prefix);
+ Optional<StringRef> Suffix = getString(B.Suffix);
+
+ DestPath.clear();
+ if (LLVM_LIKELY(Prefix && Suffix)) {
+ DestPath.append(Prefix->begin(), Prefix->end());
+ DestPath.append(Suffix->begin(), Suffix->end());
+ }
+ return StringRef(DestPath.begin(), DestPath.size());
+ }
+}
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
new file mode 100644
index 000000000000..f0c5900c8ce4
--- /dev/null
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -0,0 +1,1801 @@
+//===- HeaderSearch.cpp - Resolve Header File Locations -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DirectoryLookup and HeaderSearch interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/DirectoryLookup.h"
+#include "clang/Lex/ExternalPreprocessorSource.h"
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/ModuleMap.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Capacity.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <system_error>
+#include <utility>
+
+using namespace clang;
+
+#define DEBUG_TYPE "file-search"
+
+ALWAYS_ENABLED_STATISTIC(NumIncluded, "Number of attempted #includes.");
+ALWAYS_ENABLED_STATISTIC(
+ NumMultiIncludeFileOptzn,
+ "Number of #includes skipped due to the multi-include optimization.");
+ALWAYS_ENABLED_STATISTIC(NumFrameworkLookups, "Number of framework lookups.");
+ALWAYS_ENABLED_STATISTIC(NumSubFrameworkLookups,
+ "Number of subframework lookups.");
+
+const IdentifierInfo *
+HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) {
+ if (ControllingMacro) {
+ if (ControllingMacro->isOutOfDate()) {
+ assert(External && "We must have an external source if we have a "
+ "controlling macro that is out of date.");
+ External->updateOutOfDateIdentifier(
+ *const_cast<IdentifierInfo *>(ControllingMacro));
+ }
+ return ControllingMacro;
+ }
+
+ if (!ControllingMacroID || !External)
+ return nullptr;
+
+ ControllingMacro = External->GetIdentifier(ControllingMacroID);
+ return ControllingMacro;
+}
+
+ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() = default;
+
+HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
+ SourceManager &SourceMgr, DiagnosticsEngine &Diags,
+ const LangOptions &LangOpts,
+ const TargetInfo *Target)
+ : HSOpts(std::move(HSOpts)), Diags(Diags),
+ FileMgr(SourceMgr.getFileManager()), FrameworkMap(64),
+ ModMap(SourceMgr, Diags, LangOpts, Target, *this) {}
+
+void HeaderSearch::PrintStats() {
+ llvm::errs() << "\n*** HeaderSearch Stats:\n"
+ << FileInfo.size() << " files tracked.\n";
+ unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
+ for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+ NumOnceOnlyFiles += FileInfo[i].isImport;
+ if (MaxNumIncludes < FileInfo[i].NumIncludes)
+ MaxNumIncludes = FileInfo[i].NumIncludes;
+ NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
+ }
+ llvm::errs() << " " << NumOnceOnlyFiles << " #import/#pragma once files.\n"
+ << " " << NumSingleIncludedFiles << " included exactly once.\n"
+ << " " << MaxNumIncludes << " max times a file is included.\n";
+
+ llvm::errs() << " " << NumIncluded << " #include/#include_next/#import.\n"
+ << " " << NumMultiIncludeFileOptzn
+ << " #includes skipped due to the multi-include optimization.\n";
+
+ llvm::errs() << NumFrameworkLookups << " framework lookups.\n"
+ << NumSubFrameworkLookups << " subframework lookups.\n";
+}
+
+/// CreateHeaderMap - This method returns a HeaderMap for the specified
+/// FileEntry, uniquing them through the 'HeaderMaps' datastructure.
+const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) {
+ // We expect the number of headermaps to be small, and almost always empty.
+ // If it ever grows, use of a linear search should be re-evaluated.
+ if (!HeaderMaps.empty()) {
+ for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i)
+ // Pointer equality comparison of FileEntries works because they are
+ // already uniqued by inode.
+ if (HeaderMaps[i].first == FE)
+ return HeaderMaps[i].second.get();
+ }
+
+ if (std::unique_ptr<HeaderMap> HM = HeaderMap::Create(FE, FileMgr)) {
+ HeaderMaps.emplace_back(FE, std::move(HM));
+ return HeaderMaps.back().second.get();
+ }
+
+ return nullptr;
+}
+
+/// Get filenames for all registered header maps.
+void HeaderSearch::getHeaderMapFileNames(
+ SmallVectorImpl<std::string> &Names) const {
+ for (auto &HM : HeaderMaps)
+ Names.push_back(HM.first->getName());
+}
+
+std::string HeaderSearch::getCachedModuleFileName(Module *Module) {
+ const FileEntry *ModuleMap =
+ getModuleMap().getModuleMapFileForUniquing(Module);
+ return getCachedModuleFileName(Module->Name, ModuleMap->getName());
+}
+
+std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName,
+ bool FileMapOnly) {
+ // First check the module name to pcm file map.
+ auto i (HSOpts->PrebuiltModuleFiles.find(ModuleName));
+ if (i != HSOpts->PrebuiltModuleFiles.end())
+ return i->second;
+
+ if (FileMapOnly || HSOpts->PrebuiltModulePaths.empty())
+ return {};
+
+ // Then go through each prebuilt module directory and try to find the pcm
+ // file.
+ for (const std::string &Dir : HSOpts->PrebuiltModulePaths) {
+ SmallString<256> Result(Dir);
+ llvm::sys::fs::make_absolute(Result);
+ llvm::sys::path::append(Result, ModuleName + ".pcm");
+ if (getFileMgr().getFile(Result.str()))
+ return Result.str().str();
+ }
+ return {};
+}
+
+std::string HeaderSearch::getCachedModuleFileName(StringRef ModuleName,
+ StringRef ModuleMapPath) {
+ // If we don't have a module cache path or aren't supposed to use one, we
+ // can't do anything.
+ if (getModuleCachePath().empty())
+ return {};
+
+ SmallString<256> Result(getModuleCachePath());
+ llvm::sys::fs::make_absolute(Result);
+
+ if (HSOpts->DisableModuleHash) {
+ llvm::sys::path::append(Result, ModuleName + ".pcm");
+ } else {
+ // Construct the name <ModuleName>-<hash of ModuleMapPath>.pcm which should
+ // ideally be globally unique to this particular module. Name collisions
+ // in the hash are safe (because any translation unit can only import one
+ // module with each name), but result in a loss of caching.
+ //
+ // To avoid false-negatives, we form as canonical a path as we can, and map
+ // to lower-case in case we're on a case-insensitive file system.
+ std::string Parent = llvm::sys::path::parent_path(ModuleMapPath);
+ if (Parent.empty())
+ Parent = ".";
+ auto Dir = FileMgr.getDirectory(Parent);
+ if (!Dir)
+ return {};
+ auto DirName = FileMgr.getCanonicalName(*Dir);
+ auto FileName = llvm::sys::path::filename(ModuleMapPath);
+
+ llvm::hash_code Hash =
+ llvm::hash_combine(DirName.lower(), FileName.lower());
+
+ SmallString<128> HashStr;
+ llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);
+ llvm::sys::path::append(Result, ModuleName + "-" + HashStr + ".pcm");
+ }
+ return Result.str().str();
+}
+
+Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch,
+ bool AllowExtraModuleMapSearch) {
+ // Look in the module map to determine if there is a module by this name.
+ Module *Module = ModMap.findModule(ModuleName);
+ if (Module || !AllowSearch || !HSOpts->ImplicitModuleMaps)
+ return Module;
+
+ StringRef SearchName = ModuleName;
+ Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch);
+
+ // The facility for "private modules" -- adjacent, optional module maps named
+ // module.private.modulemap that are supposed to define private submodules --
+ // may have different flavors of names: FooPrivate, Foo_Private and Foo.Private.
+ //
+ // Foo.Private is now deprecated in favor of Foo_Private. Users of FooPrivate
+ // should also rename to Foo_Private. Representing private as submodules
+ // could force building unwanted dependencies into the parent module and cause
+ // dependency cycles.
+ if (!Module && SearchName.consume_back("_Private"))
+ Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch);
+ if (!Module && SearchName.consume_back("Private"))
+ Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch);
+ return Module;
+}
+
+Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
+ bool AllowExtraModuleMapSearch) {
+ Module *Module = nullptr;
+
+ // Look through the various header search paths to load any available module
+ // maps, searching for a module map that describes this module.
+ for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
+ if (SearchDirs[Idx].isFramework()) {
+ // Search for or infer a module map for a framework. Here we use
+ // SearchName rather than ModuleName, to permit finding private modules
+ // named FooPrivate in buggy frameworks named Foo.
+ SmallString<128> FrameworkDirName;
+ FrameworkDirName += SearchDirs[Idx].getFrameworkDir()->getName();
+ llvm::sys::path::append(FrameworkDirName, SearchName + ".framework");
+ if (auto FrameworkDir = FileMgr.getDirectory(FrameworkDirName)) {
+ bool IsSystem
+ = SearchDirs[Idx].getDirCharacteristic() != SrcMgr::C_User;
+ Module = loadFrameworkModule(ModuleName, *FrameworkDir, IsSystem);
+ if (Module)
+ break;
+ }
+ }
+
+ // FIXME: Figure out how header maps and module maps will work together.
+
+ // Only deal with normal search directories.
+ if (!SearchDirs[Idx].isNormalDir())
+ continue;
+
+ bool IsSystem = SearchDirs[Idx].isSystemHeaderDirectory();
+ // Search for a module map file in this directory.
+ if (loadModuleMapFile(SearchDirs[Idx].getDir(), IsSystem,
+ /*IsFramework*/false) == LMM_NewlyLoaded) {
+ // We just loaded a module map file; check whether the module is
+ // available now.
+ Module = ModMap.findModule(ModuleName);
+ if (Module)
+ break;
+ }
+
+ // Search for a module map in a subdirectory with the same name as the
+ // module.
+ SmallString<128> NestedModuleMapDirName;
+ NestedModuleMapDirName = SearchDirs[Idx].getDir()->getName();
+ llvm::sys::path::append(NestedModuleMapDirName, ModuleName);
+ if (loadModuleMapFile(NestedModuleMapDirName, IsSystem,
+ /*IsFramework*/false) == LMM_NewlyLoaded){
+ // If we just loaded a module map file, look for the module again.
+ Module = ModMap.findModule(ModuleName);
+ if (Module)
+ break;
+ }
+
+ // If we've already performed the exhaustive search for module maps in this
+ // search directory, don't do it again.
+ if (SearchDirs[Idx].haveSearchedAllModuleMaps())
+ continue;
+
+ // Load all module maps in the immediate subdirectories of this search
+ // directory if ModuleName was from @import.
+ if (AllowExtraModuleMapSearch)
+ loadSubdirectoryModuleMaps(SearchDirs[Idx]);
+
+ // Look again for the module.
+ Module = ModMap.findModule(ModuleName);
+ if (Module)
+ break;
+ }
+
+ return Module;
+}
+
+//===----------------------------------------------------------------------===//
+// File lookup within a DirectoryLookup scope
+//===----------------------------------------------------------------------===//
+
+/// getName - Return the directory or filename corresponding to this lookup
+/// object.
+StringRef DirectoryLookup::getName() const {
+ // FIXME: Use the name from \c DirectoryEntryRef.
+ if (isNormalDir())
+ return getDir()->getName();
+ if (isFramework())
+ return getFrameworkDir()->getName();
+ assert(isHeaderMap() && "Unknown DirectoryLookup");
+ return getHeaderMap()->getFileName();
+}
+
+Optional<FileEntryRef> HeaderSearch::getFileAndSuggestModule(
+ StringRef FileName, SourceLocation IncludeLoc, const DirectoryEntry *Dir,
+ bool IsSystemHeaderDir, Module *RequestingModule,
+ ModuleMap::KnownHeader *SuggestedModule) {
+ // If we have a module map that might map this header, load it and
+ // check whether we'll have a suggestion for a module.
+ auto File = getFileMgr().getFileRef(FileName, /*OpenFile=*/true);
+ if (!File) {
+ // For rare, surprising errors (e.g. "out of file handles"), diag the EC
+ // message.
+ std::error_code EC = llvm::errorToErrorCode(File.takeError());
+ if (EC != llvm::errc::no_such_file_or_directory &&
+ EC != llvm::errc::invalid_argument &&
+ EC != llvm::errc::is_a_directory && EC != llvm::errc::not_a_directory) {
+ Diags.Report(IncludeLoc, diag::err_cannot_open_file)
+ << FileName << EC.message();
+ }
+ return None;
+ }
+
+ // If there is a module that corresponds to this header, suggest it.
+ if (!findUsableModuleForHeader(
+ &File->getFileEntry(), Dir ? Dir : File->getFileEntry().getDir(),
+ RequestingModule, SuggestedModule, IsSystemHeaderDir))
+ return None;
+
+ return *File;
+}
+
+/// LookupFile - Lookup the specified file in this search path, returning it
+/// if it exists or returning null if not.
+Optional<FileEntryRef> DirectoryLookup::LookupFile(
+ StringRef &Filename, HeaderSearch &HS, SourceLocation IncludeLoc,
+ SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
+ Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,
+ bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound,
+ bool &IsInHeaderMap, SmallVectorImpl<char> &MappedName) const {
+ InUserSpecifiedSystemFramework = false;
+ IsInHeaderMap = false;
+ MappedName.clear();
+
+ SmallString<1024> TmpDir;
+ if (isNormalDir()) {
+ // Concatenate the requested file onto the directory.
+ TmpDir = getDir()->getName();
+ llvm::sys::path::append(TmpDir, Filename);
+ if (SearchPath) {
+ StringRef SearchPathRef(getDir()->getName());
+ SearchPath->clear();
+ SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());
+ }
+ if (RelativePath) {
+ RelativePath->clear();
+ RelativePath->append(Filename.begin(), Filename.end());
+ }
+
+ return HS.getFileAndSuggestModule(TmpDir, IncludeLoc, getDir(),
+ isSystemHeaderDirectory(),
+ RequestingModule, SuggestedModule);
+ }
+
+ if (isFramework())
+ return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath,
+ RequestingModule, SuggestedModule,
+ InUserSpecifiedSystemFramework, IsFrameworkFound);
+
+ assert(isHeaderMap() && "Unknown directory lookup");
+ const HeaderMap *HM = getHeaderMap();
+ SmallString<1024> Path;
+ StringRef Dest = HM->lookupFilename(Filename, Path);
+ if (Dest.empty())
+ return None;
+
+ IsInHeaderMap = true;
+
+ auto FixupSearchPath = [&]() {
+ if (SearchPath) {
+ StringRef SearchPathRef(getName());
+ SearchPath->clear();
+ SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());
+ }
+ if (RelativePath) {
+ RelativePath->clear();
+ RelativePath->append(Filename.begin(), Filename.end());
+ }
+ };
+
+ // Check if the headermap maps the filename to a framework include
+ // ("Foo.h" -> "Foo/Foo.h"), in which case continue header lookup using the
+ // framework include.
+ if (llvm::sys::path::is_relative(Dest)) {
+ MappedName.append(Dest.begin(), Dest.end());
+ Filename = StringRef(MappedName.begin(), MappedName.size());
+ Optional<FileEntryRef> Result = HM->LookupFile(Filename, HS.getFileMgr());
+ if (Result) {
+ FixupSearchPath();
+ return *Result;
+ }
+ } else if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest)) {
+ FixupSearchPath();
+ return *Res;
+ }
+
+ return None;
+}
+
+/// Given a framework directory, find the top-most framework directory.
+///
+/// \param FileMgr The file manager to use for directory lookups.
+/// \param DirName The name of the framework directory.
+/// \param SubmodulePath Will be populated with the submodule path from the
+/// returned top-level module to the originally named framework.
+static const DirectoryEntry *
+getTopFrameworkDir(FileManager &FileMgr, StringRef DirName,
+ SmallVectorImpl<std::string> &SubmodulePath) {
+ assert(llvm::sys::path::extension(DirName) == ".framework" &&
+ "Not a framework directory");
+
+ // Note: as an egregious but useful hack we use the real path here, because
+ // frameworks moving between top-level frameworks to embedded frameworks tend
+ // to be symlinked, and we base the logical structure of modules on the
+ // physical layout. In particular, we need to deal with crazy includes like
+ //
+ // #include <Foo/Frameworks/Bar.framework/Headers/Wibble.h>
+ //
+ // where 'Bar' used to be embedded in 'Foo', is now a top-level framework
+ // which one should access with, e.g.,
+ //
+ // #include <Bar/Wibble.h>
+ //
+ // Similar issues occur when a top-level framework has moved into an
+ // embedded framework.
+ const DirectoryEntry *TopFrameworkDir = nullptr;
+ if (auto TopFrameworkDirOrErr = FileMgr.getDirectory(DirName))
+ TopFrameworkDir = *TopFrameworkDirOrErr;
+
+ if (TopFrameworkDir)
+ DirName = FileMgr.getCanonicalName(TopFrameworkDir);
+ do {
+ // Get the parent directory name.
+ DirName = llvm::sys::path::parent_path(DirName);
+ if (DirName.empty())
+ break;
+
+ // Determine whether this directory exists.
+ auto Dir = FileMgr.getDirectory(DirName);
+ if (!Dir)
+ break;
+
+ // If this is a framework directory, then we're a subframework of this
+ // framework.
+ if (llvm::sys::path::extension(DirName) == ".framework") {
+ SubmodulePath.push_back(llvm::sys::path::stem(DirName));
+ TopFrameworkDir = *Dir;
+ }
+ } while (true);
+
+ return TopFrameworkDir;
+}
+
+static bool needModuleLookup(Module *RequestingModule,
+ bool HasSuggestedModule) {
+ return HasSuggestedModule ||
+ (RequestingModule && RequestingModule->NoUndeclaredIncludes);
+}
+
+/// DoFrameworkLookup - Do a lookup of the specified file in the current
+/// DirectoryLookup, which is a framework directory.
+Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup(
+ StringRef Filename, HeaderSearch &HS, SmallVectorImpl<char> *SearchPath,
+ SmallVectorImpl<char> *RelativePath, Module *RequestingModule,
+ ModuleMap::KnownHeader *SuggestedModule,
+ bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound) const {
+ FileManager &FileMgr = HS.getFileMgr();
+
+ // Framework names must have a '/' in the filename.
+ size_t SlashPos = Filename.find('/');
+ if (SlashPos == StringRef::npos)
+ return None;
+
+ // Find out if this is the home for the specified framework, by checking
+ // HeaderSearch. Possible answers are yes/no and unknown.
+ FrameworkCacheEntry &CacheEntry =
+ HS.LookupFrameworkCache(Filename.substr(0, SlashPos));
+
+ // If it is known and in some other directory, fail.
+ if (CacheEntry.Directory && CacheEntry.Directory != getFrameworkDir())
+ return None;
+
+ // Otherwise, construct the path to this framework dir.
+
+ // FrameworkName = "/System/Library/Frameworks/"
+ SmallString<1024> FrameworkName;
+ FrameworkName += getFrameworkDirRef()->getName();
+ if (FrameworkName.empty() || FrameworkName.back() != '/')
+ FrameworkName.push_back('/');
+
+ // FrameworkName = "/System/Library/Frameworks/Cocoa"
+ StringRef ModuleName(Filename.begin(), SlashPos);
+ FrameworkName += ModuleName;
+
+ // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/"
+ FrameworkName += ".framework/";
+
+ // If the cache entry was unresolved, populate it now.
+ if (!CacheEntry.Directory) {
+ ++NumFrameworkLookups;
+
+ // If the framework dir doesn't exist, we fail.
+ auto Dir = FileMgr.getDirectory(FrameworkName);
+ if (!Dir)
+ return None;
+
+ // Otherwise, if it does, remember that this is the right direntry for this
+ // framework.
+ CacheEntry.Directory = getFrameworkDir();
+
+ // If this is a user search directory, check if the framework has been
+ // user-specified as a system framework.
+ if (getDirCharacteristic() == SrcMgr::C_User) {
+ SmallString<1024> SystemFrameworkMarker(FrameworkName);
+ SystemFrameworkMarker += ".system_framework";
+ if (llvm::sys::fs::exists(SystemFrameworkMarker)) {
+ CacheEntry.IsUserSpecifiedSystemFramework = true;
+ }
+ }
+ }
+
+ // Set out flags.
+ InUserSpecifiedSystemFramework = CacheEntry.IsUserSpecifiedSystemFramework;
+ IsFrameworkFound = CacheEntry.Directory;
+
+ if (RelativePath) {
+ RelativePath->clear();
+ RelativePath->append(Filename.begin()+SlashPos+1, Filename.end());
+ }
+
+ // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h"
+ unsigned OrigSize = FrameworkName.size();
+
+ FrameworkName += "Headers/";
+
+ if (SearchPath) {
+ SearchPath->clear();
+ // Without trailing '/'.
+ SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1);
+ }
+
+ FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());
+
+ auto File =
+ FileMgr.getOptionalFileRef(FrameworkName, /*OpenFile=*/!SuggestedModule);
+ if (!File) {
+ // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
+ const char *Private = "Private";
+ FrameworkName.insert(FrameworkName.begin()+OrigSize, Private,
+ Private+strlen(Private));
+ if (SearchPath)
+ SearchPath->insert(SearchPath->begin()+OrigSize, Private,
+ Private+strlen(Private));
+
+ File = FileMgr.getOptionalFileRef(FrameworkName,
+ /*OpenFile=*/!SuggestedModule);
+ }
+
+ // If we found the header and are allowed to suggest a module, do so now.
+ if (File && needModuleLookup(RequestingModule, SuggestedModule)) {
+ // Find the framework in which this header occurs.
+ StringRef FrameworkPath = File->getFileEntry().getDir()->getName();
+ bool FoundFramework = false;
+ do {
+ // Determine whether this directory exists.
+ auto Dir = FileMgr.getDirectory(FrameworkPath);
+ if (!Dir)
+ break;
+
+ // If this is a framework directory, then we're a subframework of this
+ // framework.
+ if (llvm::sys::path::extension(FrameworkPath) == ".framework") {
+ FoundFramework = true;
+ break;
+ }
+
+ // Get the parent directory name.
+ FrameworkPath = llvm::sys::path::parent_path(FrameworkPath);
+ if (FrameworkPath.empty())
+ break;
+ } while (true);
+
+ bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
+ if (FoundFramework) {
+ if (!HS.findUsableModuleForFrameworkHeader(
+ &File->getFileEntry(), FrameworkPath, RequestingModule,
+ SuggestedModule, IsSystem))
+ return None;
+ } else {
+ if (!HS.findUsableModuleForHeader(&File->getFileEntry(), getDir(),
+ RequestingModule, SuggestedModule,
+ IsSystem))
+ return None;
+ }
+ }
+ if (File)
+ return *File;
+ return None;
+}
+
+void HeaderSearch::setTarget(const TargetInfo &Target) {
+ ModMap.setTarget(Target);
+}
+
+//===----------------------------------------------------------------------===//
+// Header File Location.
+//===----------------------------------------------------------------------===//
+
+/// Return true with a diagnostic if the file that MSVC would have found
+/// fails to match the one that Clang would have found with MSVC header search
+/// disabled.
+static bool checkMSVCHeaderSearch(DiagnosticsEngine &Diags,
+ const FileEntry *MSFE, const FileEntry *FE,
+ SourceLocation IncludeLoc) {
+ if (MSFE && FE != MSFE) {
+ Diags.Report(IncludeLoc, diag::ext_pp_include_search_ms) << MSFE->getName();
+ return true;
+ }
+ return false;
+}
+
+static const char *copyString(StringRef Str, llvm::BumpPtrAllocator &Alloc) {
+ assert(!Str.empty());
+ char *CopyStr = Alloc.Allocate<char>(Str.size()+1);
+ std::copy(Str.begin(), Str.end(), CopyStr);
+ CopyStr[Str.size()] = '\0';
+ return CopyStr;
+}
+
+static bool isFrameworkStylePath(StringRef Path, bool &IsPrivateHeader,
+ SmallVectorImpl<char> &FrameworkName) {
+ using namespace llvm::sys;
+ path::const_iterator I = path::begin(Path);
+ path::const_iterator E = path::end(Path);
+ IsPrivateHeader = false;
+
+ // Detect different types of framework style paths:
+ //
+ // ...Foo.framework/{Headers,PrivateHeaders}
+ // ...Foo.framework/Versions/{A,Current}/{Headers,PrivateHeaders}
+ // ...Foo.framework/Frameworks/Nested.framework/{Headers,PrivateHeaders}
+ // ...<other variations with 'Versions' like in the above path>
+ //
+ // and some other variations among these lines.
+ int FoundComp = 0;
+ while (I != E) {
+ if (*I == "Headers")
+ ++FoundComp;
+ if (I->endswith(".framework")) {
+ FrameworkName.append(I->begin(), I->end());
+ ++FoundComp;
+ }
+ if (*I == "PrivateHeaders") {
+ ++FoundComp;
+ IsPrivateHeader = true;
+ }
+ ++I;
+ }
+
+ return !FrameworkName.empty() && FoundComp >= 2;
+}
+
+static void
+diagnoseFrameworkInclude(DiagnosticsEngine &Diags, SourceLocation IncludeLoc,
+ StringRef Includer, StringRef IncludeFilename,
+ const FileEntry *IncludeFE, bool isAngled = false,
+ bool FoundByHeaderMap = false) {
+ bool IsIncluderPrivateHeader = false;
+ SmallString<128> FromFramework, ToFramework;
+ if (!isFrameworkStylePath(Includer, IsIncluderPrivateHeader, FromFramework))
+ return;
+ bool IsIncludeePrivateHeader = false;
+ bool IsIncludeeInFramework = isFrameworkStylePath(
+ IncludeFE->getName(), IsIncludeePrivateHeader, ToFramework);
+
+ if (!isAngled && !FoundByHeaderMap) {
+ SmallString<128> NewInclude("<");
+ if (IsIncludeeInFramework) {
+ NewInclude += StringRef(ToFramework).drop_back(10); // drop .framework
+ NewInclude += "/";
+ }
+ NewInclude += IncludeFilename;
+ NewInclude += ">";
+ Diags.Report(IncludeLoc, diag::warn_quoted_include_in_framework_header)
+ << IncludeFilename
+ << FixItHint::CreateReplacement(IncludeLoc, NewInclude);
+ }
+
+ // Headers in Foo.framework/Headers should not include headers
+ // from Foo.framework/PrivateHeaders, since this violates public/private
+ // API boundaries and can cause modular dependency cycles.
+ if (!IsIncluderPrivateHeader && IsIncludeeInFramework &&
+ IsIncludeePrivateHeader && FromFramework == ToFramework)
+ Diags.Report(IncludeLoc, diag::warn_framework_include_private_from_public)
+ << IncludeFilename;
+}
+
+/// LookupFile - Given a "foo" or \<foo> reference, look up the indicated file,
+/// return null on failure. isAngled indicates whether the file reference is
+/// for system \#include's or not (i.e. using <> instead of ""). Includers, if
+/// non-empty, indicates where the \#including file(s) are, in case a relative
+/// search is needed. Microsoft mode will pass all \#including files.
+Optional<FileEntryRef> HeaderSearch::LookupFile(
+ StringRef Filename, SourceLocation IncludeLoc, bool isAngled,
+ const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir,
+ ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers,
+ SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
+ Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,
+ bool *IsMapped, bool *IsFrameworkFound, bool SkipCache,
+ bool BuildSystemModule) {
+ if (IsMapped)
+ *IsMapped = false;
+
+ if (IsFrameworkFound)
+ *IsFrameworkFound = false;
+
+ if (SuggestedModule)
+ *SuggestedModule = ModuleMap::KnownHeader();
+
+ // If 'Filename' is absolute, check to see if it exists and no searching.
+ if (llvm::sys::path::is_absolute(Filename)) {
+ CurDir = nullptr;
+
+ // If this was an #include_next "/absolute/file", fail.
+ if (FromDir)
+ return None;
+
+ if (SearchPath)
+ SearchPath->clear();
+ if (RelativePath) {
+ RelativePath->clear();
+ RelativePath->append(Filename.begin(), Filename.end());
+ }
+ // Otherwise, just return the file.
+ return getFileAndSuggestModule(Filename, IncludeLoc, nullptr,
+ /*IsSystemHeaderDir*/false,
+ RequestingModule, SuggestedModule);
+ }
+
+ // This is the header that MSVC's header search would have found.
+ ModuleMap::KnownHeader MSSuggestedModule;
+ const FileEntry *MSFE_FE = nullptr;
+ StringRef MSFE_Name;
+
+ // Unless disabled, check to see if the file is in the #includer's
+ // directory. This cannot be based on CurDir, because each includer could be
+ // a #include of a subdirectory (#include "foo/bar.h") and a subsequent
+ // include of "baz.h" should resolve to "whatever/foo/baz.h".
+ // This search is not done for <> headers.
+ if (!Includers.empty() && !isAngled && !NoCurDirSearch) {
+ SmallString<1024> TmpDir;
+ bool First = true;
+ for (const auto &IncluderAndDir : Includers) {
+ const FileEntry *Includer = IncluderAndDir.first;
+
+ // Concatenate the requested file onto the directory.
+ // FIXME: Portability. Filename concatenation should be in sys::Path.
+ TmpDir = IncluderAndDir.second->getName();
+ TmpDir.push_back('/');
+ TmpDir.append(Filename.begin(), Filename.end());
+
+ // FIXME: We don't cache the result of getFileInfo across the call to
+ // getFileAndSuggestModule, because it's a reference to an element of
+ // a container that could be reallocated across this call.
+ //
+ // If we have no includer, that means we're processing a #include
+ // from a module build. We should treat this as a system header if we're
+ // building a [system] module.
+ bool IncluderIsSystemHeader =
+ Includer ? getFileInfo(Includer).DirInfo != SrcMgr::C_User :
+ BuildSystemModule;
+ if (Optional<FileEntryRef> FE = getFileAndSuggestModule(
+ TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader,
+ RequestingModule, SuggestedModule)) {
+ if (!Includer) {
+ assert(First && "only first includer can have no file");
+ return FE;
+ }
+
+ // Leave CurDir unset.
+ // This file is a system header or C++ unfriendly if the old file is.
+ //
+ // Note that we only use one of FromHFI/ToHFI at once, due to potential
+ // reallocation of the underlying vector potentially making the first
+ // reference binding dangling.
+ HeaderFileInfo &FromHFI = getFileInfo(Includer);
+ unsigned DirInfo = FromHFI.DirInfo;
+ bool IndexHeaderMapHeader = FromHFI.IndexHeaderMapHeader;
+ StringRef Framework = FromHFI.Framework;
+
+ HeaderFileInfo &ToHFI = getFileInfo(&FE->getFileEntry());
+ ToHFI.DirInfo = DirInfo;
+ ToHFI.IndexHeaderMapHeader = IndexHeaderMapHeader;
+ ToHFI.Framework = Framework;
+
+ if (SearchPath) {
+ StringRef SearchPathRef(IncluderAndDir.second->getName());
+ SearchPath->clear();
+ SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());
+ }
+ if (RelativePath) {
+ RelativePath->clear();
+ RelativePath->append(Filename.begin(), Filename.end());
+ }
+ if (First) {
+ diagnoseFrameworkInclude(Diags, IncludeLoc,
+ IncluderAndDir.second->getName(), Filename,
+ &FE->getFileEntry());
+ return FE;
+ }
+
+ // Otherwise, we found the path via MSVC header search rules. If
+ // -Wmsvc-include is enabled, we have to keep searching to see if we
+ // would've found this header in -I or -isystem directories.
+ if (Diags.isIgnored(diag::ext_pp_include_search_ms, IncludeLoc)) {
+ return FE;
+ } else {
+ MSFE_FE = &FE->getFileEntry();
+ MSFE_Name = FE->getName();
+ if (SuggestedModule) {
+ MSSuggestedModule = *SuggestedModule;
+ *SuggestedModule = ModuleMap::KnownHeader();
+ }
+ break;
+ }
+ }
+ First = false;
+ }
+ }
+
+ Optional<FileEntryRef> MSFE(MSFE_FE ? FileEntryRef(MSFE_Name, *MSFE_FE)
+ : Optional<FileEntryRef>());
+
+ CurDir = nullptr;
+
+ // If this is a system #include, ignore the user #include locs.
+ unsigned i = isAngled ? AngledDirIdx : 0;
+
+ // If this is a #include_next request, start searching after the directory the
+ // file was found in.
+ if (FromDir)
+ i = FromDir-&SearchDirs[0];
+
+ // Cache all of the lookups performed by this method. Many headers are
+ // multiply included, and the "pragma once" optimization prevents them from
+ // being relex/pp'd, but they would still have to search through a
+ // (potentially huge) series of SearchDirs to find it.
+ LookupFileCacheInfo &CacheLookup = LookupFileCache[Filename];
+
+ // If the entry has been previously looked up, the first value will be
+ // non-zero. If the value is equal to i (the start point of our search), then
+ // this is a matching hit.
+ if (!SkipCache && CacheLookup.StartIdx == i+1) {
+ // Skip querying potentially lots of directories for this lookup.
+ i = CacheLookup.HitIdx;
+ if (CacheLookup.MappedName) {
+ Filename = CacheLookup.MappedName;
+ if (IsMapped)
+ *IsMapped = true;
+ }
+ } else {
+ // Otherwise, this is the first query, or the previous query didn't match
+ // our search start. We will fill in our found location below, so prime the
+ // start point value.
+ CacheLookup.reset(/*StartIdx=*/i+1);
+ }
+
+ SmallString<64> MappedName;
+
+ // Check each directory in sequence to see if it contains this file.
+ for (; i != SearchDirs.size(); ++i) {
+ bool InUserSpecifiedSystemFramework = false;
+ bool IsInHeaderMap = false;
+ bool IsFrameworkFoundInDir = false;
+ Optional<FileEntryRef> File = SearchDirs[i].LookupFile(
+ Filename, *this, IncludeLoc, SearchPath, RelativePath, RequestingModule,
+ SuggestedModule, InUserSpecifiedSystemFramework, IsFrameworkFoundInDir,
+ IsInHeaderMap, MappedName);
+ if (!MappedName.empty()) {
+ assert(IsInHeaderMap && "MappedName should come from a header map");
+ CacheLookup.MappedName =
+ copyString(MappedName, LookupFileCache.getAllocator());
+ }
+ if (IsMapped)
+ // A filename is mapped when a header map remapped it to a relative path
+ // used in subsequent header search or to an absolute path pointing to an
+ // existing file.
+ *IsMapped |= (!MappedName.empty() || (IsInHeaderMap && File));
+ if (IsFrameworkFound)
+ // Because we keep a filename remapped for subsequent search directory
+ // lookups, ignore IsFrameworkFoundInDir after the first remapping and not
+ // just for remapping in a current search directory.
+ *IsFrameworkFound |= (IsFrameworkFoundInDir && !CacheLookup.MappedName);
+ if (!File)
+ continue;
+
+ CurDir = &SearchDirs[i];
+
+ // This file is a system header or C++ unfriendly if the dir is.
+ HeaderFileInfo &HFI = getFileInfo(&File->getFileEntry());
+ HFI.DirInfo = CurDir->getDirCharacteristic();
+
+ // If the directory characteristic is User but this framework was
+ // user-specified to be treated as a system framework, promote the
+ // characteristic.
+ if (HFI.DirInfo == SrcMgr::C_User && InUserSpecifiedSystemFramework)
+ HFI.DirInfo = SrcMgr::C_System;
+
+ // If the filename matches a known system header prefix, override
+ // whether the file is a system header.
+ for (unsigned j = SystemHeaderPrefixes.size(); j; --j) {
+ if (Filename.startswith(SystemHeaderPrefixes[j-1].first)) {
+ HFI.DirInfo = SystemHeaderPrefixes[j-1].second ? SrcMgr::C_System
+ : SrcMgr::C_User;
+ break;
+ }
+ }
+
+ // If this file is found in a header map and uses the framework style of
+ // includes, then this header is part of a framework we're building.
+ if (CurDir->isIndexHeaderMap()) {
+ size_t SlashPos = Filename.find('/');
+ if (SlashPos != StringRef::npos) {
+ HFI.IndexHeaderMapHeader = 1;
+ HFI.Framework = getUniqueFrameworkName(StringRef(Filename.begin(),
+ SlashPos));
+ }
+ }
+
+ if (checkMSVCHeaderSearch(Diags, MSFE ? &MSFE->getFileEntry() : nullptr,
+ &File->getFileEntry(), IncludeLoc)) {
+ if (SuggestedModule)
+ *SuggestedModule = MSSuggestedModule;
+ return MSFE;
+ }
+
+ bool FoundByHeaderMap = !IsMapped ? false : *IsMapped;
+ if (!Includers.empty())
+ diagnoseFrameworkInclude(
+ Diags, IncludeLoc, Includers.front().second->getName(), Filename,
+ &File->getFileEntry(), isAngled, FoundByHeaderMap);
+
+ // Remember this location for the next lookup we do.
+ CacheLookup.HitIdx = i;
+ return File;
+ }
+
+ // If we are including a file with a quoted include "foo.h" from inside
+ // a header in a framework that is currently being built, and we couldn't
+ // resolve "foo.h" any other way, change the include to <Foo/foo.h>, where
+ // "Foo" is the name of the framework in which the including header was found.
+ if (!Includers.empty() && Includers.front().first && !isAngled &&
+ Filename.find('/') == StringRef::npos) {
+ HeaderFileInfo &IncludingHFI = getFileInfo(Includers.front().first);
+ if (IncludingHFI.IndexHeaderMapHeader) {
+ SmallString<128> ScratchFilename;
+ ScratchFilename += IncludingHFI.Framework;
+ ScratchFilename += '/';
+ ScratchFilename += Filename;
+
+ Optional<FileEntryRef> File = LookupFile(
+ ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, CurDir,
+ Includers.front(), SearchPath, RelativePath, RequestingModule,
+ SuggestedModule, IsMapped, /*IsFrameworkFound=*/nullptr);
+
+ if (checkMSVCHeaderSearch(Diags, MSFE ? &MSFE->getFileEntry() : nullptr,
+ File ? &File->getFileEntry() : nullptr,
+ IncludeLoc)) {
+ if (SuggestedModule)
+ *SuggestedModule = MSSuggestedModule;
+ return MSFE;
+ }
+
+ LookupFileCacheInfo &CacheLookup = LookupFileCache[Filename];
+ CacheLookup.HitIdx = LookupFileCache[ScratchFilename].HitIdx;
+ // FIXME: SuggestedModule.
+ return File;
+ }
+ }
+
+ if (checkMSVCHeaderSearch(Diags, MSFE ? &MSFE->getFileEntry() : nullptr,
+ nullptr, IncludeLoc)) {
+ if (SuggestedModule)
+ *SuggestedModule = MSSuggestedModule;
+ return MSFE;
+ }
+
+ // Otherwise, didn't find it. Remember we didn't find this.
+ CacheLookup.HitIdx = SearchDirs.size();
+ return None;
+}
+
+/// LookupSubframeworkHeader - Look up a subframework for the specified
+/// \#include file. For example, if \#include'ing <HIToolbox/HIToolbox.h> from
+/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox
+/// is a subframework within Carbon.framework. If so, return the FileEntry
+/// for the designated file, otherwise return null.
+Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(
+ StringRef Filename, const FileEntry *ContextFileEnt,
+ SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
+ Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule) {
+ assert(ContextFileEnt && "No context file?");
+
+ // Framework names must have a '/' in the filename. Find it.
+ // FIXME: Should we permit '\' on Windows?
+ size_t SlashPos = Filename.find('/');
+ if (SlashPos == StringRef::npos)
+ return None;
+
+ // Look up the base framework name of the ContextFileEnt.
+ StringRef ContextName = ContextFileEnt->getName();
+
+ // If the context info wasn't a framework, couldn't be a subframework.
+ const unsigned DotFrameworkLen = 10;
+ auto FrameworkPos = ContextName.find(".framework");
+ if (FrameworkPos == StringRef::npos ||
+ (ContextName[FrameworkPos + DotFrameworkLen] != '/' &&
+ ContextName[FrameworkPos + DotFrameworkLen] != '\\'))
+ return None;
+
+ SmallString<1024> FrameworkName(ContextName.data(), ContextName.data() +
+ FrameworkPos +
+ DotFrameworkLen + 1);
+
+ // Append Frameworks/HIToolbox.framework/
+ FrameworkName += "Frameworks/";
+ FrameworkName.append(Filename.begin(), Filename.begin()+SlashPos);
+ FrameworkName += ".framework/";
+
+ auto &CacheLookup =
+ *FrameworkMap.insert(std::make_pair(Filename.substr(0, SlashPos),
+ FrameworkCacheEntry())).first;
+
+ // Some other location?
+ if (CacheLookup.second.Directory &&
+ CacheLookup.first().size() == FrameworkName.size() &&
+ memcmp(CacheLookup.first().data(), &FrameworkName[0],
+ CacheLookup.first().size()) != 0)
+ return None;
+
+ // Cache subframework.
+ if (!CacheLookup.second.Directory) {
+ ++NumSubFrameworkLookups;
+
+ // If the framework dir doesn't exist, we fail.
+ auto Dir = FileMgr.getDirectory(FrameworkName);
+ if (!Dir)
+ return None;
+
+ // Otherwise, if it does, remember that this is the right direntry for this
+ // framework.
+ CacheLookup.second.Directory = *Dir;
+ }
+
+
+ if (RelativePath) {
+ RelativePath->clear();
+ RelativePath->append(Filename.begin()+SlashPos+1, Filename.end());
+ }
+
+ // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h"
+ SmallString<1024> HeadersFilename(FrameworkName);
+ HeadersFilename += "Headers/";
+ if (SearchPath) {
+ SearchPath->clear();
+ // Without trailing '/'.
+ SearchPath->append(HeadersFilename.begin(), HeadersFilename.end()-1);
+ }
+
+ HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
+ auto File = FileMgr.getOptionalFileRef(HeadersFilename, /*OpenFile=*/true);
+ if (!File) {
+ // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h"
+ HeadersFilename = FrameworkName;
+ HeadersFilename += "PrivateHeaders/";
+ if (SearchPath) {
+ SearchPath->clear();
+ // Without trailing '/'.
+ SearchPath->append(HeadersFilename.begin(), HeadersFilename.end()-1);
+ }
+
+ HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
+ File = FileMgr.getOptionalFileRef(HeadersFilename, /*OpenFile=*/true);
+
+ if (!File)
+ return None;
+ }
+
+ // This file is a system header or C++ unfriendly if the old file is.
+ //
+ // Note that the temporary 'DirInfo' is required here, as either call to
+ // getFileInfo could resize the vector and we don't want to rely on order
+ // of evaluation.
+ unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
+ getFileInfo(&File->getFileEntry()).DirInfo = DirInfo;
+
+ FrameworkName.pop_back(); // remove the trailing '/'
+ if (!findUsableModuleForFrameworkHeader(&File->getFileEntry(), FrameworkName,
+ RequestingModule, SuggestedModule,
+ /*IsSystem*/ false))
+ return None;
+
+ return *File;
+}
+
+//===----------------------------------------------------------------------===//
+// File Info Management.
+//===----------------------------------------------------------------------===//
+
+/// Merge the header file info provided by \p OtherHFI into the current
+/// header file info (\p HFI)
+static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
+ const HeaderFileInfo &OtherHFI) {
+ assert(OtherHFI.External && "expected to merge external HFI");
+
+ HFI.isImport |= OtherHFI.isImport;
+ HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
+ HFI.isModuleHeader |= OtherHFI.isModuleHeader;
+ HFI.NumIncludes += OtherHFI.NumIncludes;
+
+ if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
+ HFI.ControllingMacro = OtherHFI.ControllingMacro;
+ HFI.ControllingMacroID = OtherHFI.ControllingMacroID;
+ }
+
+ HFI.DirInfo = OtherHFI.DirInfo;
+ HFI.External = (!HFI.IsValid || HFI.External);
+ HFI.IsValid = true;
+ HFI.IndexHeaderMapHeader = OtherHFI.IndexHeaderMapHeader;
+
+ if (HFI.Framework.empty())
+ HFI.Framework = OtherHFI.Framework;
+}
+
+/// getFileInfo - Return the HeaderFileInfo structure for the specified
+/// FileEntry.
+HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
+ if (FE->getUID() >= FileInfo.size())
+ FileInfo.resize(FE->getUID() + 1);
+
+ HeaderFileInfo *HFI = &FileInfo[FE->getUID()];
+ // FIXME: Use a generation count to check whether this is really up to date.
+ if (ExternalSource && !HFI->Resolved) {
+ HFI->Resolved = true;
+ auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
+
+ HFI = &FileInfo[FE->getUID()];
+ if (ExternalHFI.External)
+ mergeHeaderFileInfo(*HFI, ExternalHFI);
+ }
+
+ HFI->IsValid = true;
+ // We have local information about this header file, so it's no longer
+ // strictly external.
+ HFI->External = false;
+ return *HFI;
+}
+
+const HeaderFileInfo *
+HeaderSearch::getExistingFileInfo(const FileEntry *FE,
+ bool WantExternal) const {
+ // If we have an external source, ensure we have the latest information.
+ // FIXME: Use a generation count to check whether this is really up to date.
+ HeaderFileInfo *HFI;
+ if (ExternalSource) {
+ if (FE->getUID() >= FileInfo.size()) {
+ if (!WantExternal)
+ return nullptr;
+ FileInfo.resize(FE->getUID() + 1);
+ }
+
+ HFI = &FileInfo[FE->getUID()];
+ if (!WantExternal && (!HFI->IsValid || HFI->External))
+ return nullptr;
+ if (!HFI->Resolved) {
+ HFI->Resolved = true;
+ auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
+
+ HFI = &FileInfo[FE->getUID()];
+ if (ExternalHFI.External)
+ mergeHeaderFileInfo(*HFI, ExternalHFI);
+ }
+ } else if (FE->getUID() >= FileInfo.size()) {
+ return nullptr;
+ } else {
+ HFI = &FileInfo[FE->getUID()];
+ }
+
+ if (!HFI->IsValid || (HFI->External && !WantExternal))
+ return nullptr;
+
+ return HFI;
+}
+
+bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) {
+ // Check if we've ever seen this file as a header.
+ if (auto *HFI = getExistingFileInfo(File))
+ return HFI->isPragmaOnce || HFI->isImport || HFI->ControllingMacro ||
+ HFI->ControllingMacroID;
+ return false;
+}
+
+void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE,
+ ModuleMap::ModuleHeaderRole Role,
+ bool isCompilingModuleHeader) {
+ bool isModularHeader = !(Role & ModuleMap::TextualHeader);
+
+ // Don't mark the file info as non-external if there's nothing to change.
+ if (!isCompilingModuleHeader) {
+ if (!isModularHeader)
+ return;
+ auto *HFI = getExistingFileInfo(FE);
+ if (HFI && HFI->isModuleHeader)
+ return;
+ }
+
+ auto &HFI = getFileInfo(FE);
+ HFI.isModuleHeader |= isModularHeader;
+ HFI.isCompilingModuleHeader |= isCompilingModuleHeader;
+}
+
+bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
+ const FileEntry *File, bool isImport,
+ bool ModulesEnabled, Module *M) {
+ ++NumIncluded; // Count # of attempted #includes.
+
+ // Get information about this file.
+ HeaderFileInfo &FileInfo = getFileInfo(File);
+
+ // FIXME: this is a workaround for the lack of proper modules-aware support
+ // for #import / #pragma once
+ auto TryEnterImported = [&]() -> bool {
+ if (!ModulesEnabled)
+ return false;
+ // Ensure FileInfo bits are up to date.
+ ModMap.resolveHeaderDirectives(File);
+ // Modules with builtins are special; multiple modules use builtins as
+ // modular headers, example:
+ //
+ // module stddef { header "stddef.h" export * }
+ //
+ // After module map parsing, this expands to:
+ //
+ // module stddef {
+ // header "/path_to_builtin_dirs/stddef.h"
+ // textual "stddef.h"
+ // }
+ //
+ // It's common that libc++ and system modules will both define such
+ // submodules. Make sure cached results for a builtin header won't
+ // prevent other builtin modules to potentially enter the builtin header.
+ // Note that builtins are header guarded and the decision to actually
+ // enter them is postponed to the controlling macros logic below.
+ bool TryEnterHdr = false;
+ if (FileInfo.isCompilingModuleHeader && FileInfo.isModuleHeader)
+ TryEnterHdr = File->getDir() == ModMap.getBuiltinDir() &&
+ ModuleMap::isBuiltinHeader(
+ llvm::sys::path::filename(File->getName()));
+
+ // Textual headers can be #imported from different modules. Since ObjC
+ // headers find in the wild might rely only on #import and do not contain
+ // controlling macros, be conservative and only try to enter textual headers
+ // if such macro is present.
+ if (!FileInfo.isModuleHeader &&
+ FileInfo.getControllingMacro(ExternalLookup))
+ TryEnterHdr = true;
+ return TryEnterHdr;
+ };
+
+ // If this is a #import directive, check that we have not already imported
+ // this header.
+ if (isImport) {
+ // If this has already been imported, don't import it again.
+ FileInfo.isImport = true;
+
+ // Has this already been #import'ed or #include'd?
+ if (FileInfo.NumIncludes && !TryEnterImported())
+ return false;
+ } else {
+ // Otherwise, if this is a #include of a file that was previously #import'd
+ // or if this is the second #include of a #pragma once file, ignore it.
+ if (FileInfo.isImport && !TryEnterImported())
+ return false;
+ }
+
+ // Next, check to see if the file is wrapped with #ifndef guards. If so, and
+ // if the macro that guards it is defined, we know the #include has no effect.
+ if (const IdentifierInfo *ControllingMacro
+ = FileInfo.getControllingMacro(ExternalLookup)) {
+ // If the header corresponds to a module, check whether the macro is already
+ // defined in that module rather than checking in the current set of visible
+ // modules.
+ if (M ? PP.isMacroDefinedInLocalModule(ControllingMacro, M)
+ : PP.isMacroDefined(ControllingMacro)) {
+ ++NumMultiIncludeFileOptzn;
+ return false;
+ }
+ }
+
+ // Increment the number of times this file has been included.
+ ++FileInfo.NumIncludes;
+
+ return true;
+}
+
+size_t HeaderSearch::getTotalMemory() const {
+ return SearchDirs.capacity()
+ + llvm::capacity_in_bytes(FileInfo)
+ + llvm::capacity_in_bytes(HeaderMaps)
+ + LookupFileCache.getAllocator().getTotalMemory()
+ + FrameworkMap.getAllocator().getTotalMemory();
+}
+
+StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) {
+ return FrameworkNames.insert(Framework).first->first();
+}
+
+bool HeaderSearch::hasModuleMap(StringRef FileName,
+ const DirectoryEntry *Root,
+ bool IsSystem) {
+ if (!HSOpts->ImplicitModuleMaps)
+ return false;
+
+ SmallVector<const DirectoryEntry *, 2> FixUpDirectories;
+
+ StringRef DirName = FileName;
+ do {
+ // Get the parent directory name.
+ DirName = llvm::sys::path::parent_path(DirName);
+ if (DirName.empty())
+ return false;
+
+ // Determine whether this directory exists.
+ auto Dir = FileMgr.getDirectory(DirName);
+ if (!Dir)
+ return false;
+
+ // Try to load the module map file in this directory.
+ switch (loadModuleMapFile(*Dir, IsSystem,
+ llvm::sys::path::extension((*Dir)->getName()) ==
+ ".framework")) {
+ case LMM_NewlyLoaded:
+ case LMM_AlreadyLoaded:
+ // Success. All of the directories we stepped through inherit this module
+ // map file.
+ for (unsigned I = 0, N = FixUpDirectories.size(); I != N; ++I)
+ DirectoryHasModuleMap[FixUpDirectories[I]] = true;
+ return true;
+
+ case LMM_NoDirectory:
+ case LMM_InvalidModuleMap:
+ break;
+ }
+
+ // If we hit the top of our search, we're done.
+ if (*Dir == Root)
+ return false;
+
+ // Keep track of all of the directories we checked, so we can mark them as
+ // having module maps if we eventually do find a module map.
+ FixUpDirectories.push_back(*Dir);
+ } while (true);
+}
+
+ModuleMap::KnownHeader
+HeaderSearch::findModuleForHeader(const FileEntry *File,
+ bool AllowTextual) const {
+ if (ExternalSource) {
+ // Make sure the external source has handled header info about this file,
+ // which includes whether the file is part of a module.
+ (void)getExistingFileInfo(File);
+ }
+ return ModMap.findModuleForHeader(File, AllowTextual);
+}
+
+static bool suggestModule(HeaderSearch &HS, const FileEntry *File,
+ Module *RequestingModule,
+ ModuleMap::KnownHeader *SuggestedModule) {
+ ModuleMap::KnownHeader Module =
+ HS.findModuleForHeader(File, /*AllowTextual*/true);
+ if (SuggestedModule)
+ *SuggestedModule = (Module.getRole() & ModuleMap::TextualHeader)
+ ? ModuleMap::KnownHeader()
+ : Module;
+
+ // If this module specifies [no_undeclared_includes], we cannot find any
+ // file that's in a non-dependency module.
+ if (RequestingModule && Module && RequestingModule->NoUndeclaredIncludes) {
+ HS.getModuleMap().resolveUses(RequestingModule, /*Complain*/false);
+ if (!RequestingModule->directlyUses(Module.getModule())) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool HeaderSearch::findUsableModuleForHeader(
+ const FileEntry *File, const DirectoryEntry *Root, Module *RequestingModule,
+ ModuleMap::KnownHeader *SuggestedModule, bool IsSystemHeaderDir) {
+ if (File && needModuleLookup(RequestingModule, SuggestedModule)) {
+ // If there is a module that corresponds to this header, suggest it.
+ hasModuleMap(File->getName(), Root, IsSystemHeaderDir);
+ return suggestModule(*this, File, RequestingModule, SuggestedModule);
+ }
+ return true;
+}
+
+bool HeaderSearch::findUsableModuleForFrameworkHeader(
+ const FileEntry *File, StringRef FrameworkName, Module *RequestingModule,
+ ModuleMap::KnownHeader *SuggestedModule, bool IsSystemFramework) {
+ // If we're supposed to suggest a module, look for one now.
+ if (needModuleLookup(RequestingModule, SuggestedModule)) {
+ // Find the top-level framework based on this framework.
+ SmallVector<std::string, 4> SubmodulePath;
+ const DirectoryEntry *TopFrameworkDir
+ = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath);
+
+ // Determine the name of the top-level framework.
+ StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
+
+ // Load this framework module. If that succeeds, find the suggested module
+ // for this header, if any.
+ loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystemFramework);
+
+ // FIXME: This can find a module not part of ModuleName, which is
+ // important so that we're consistent about whether this header
+ // corresponds to a module. Possibly we should lock down framework modules
+ // so that this is not possible.
+ return suggestModule(*this, File, RequestingModule, SuggestedModule);
+ }
+ return true;
+}
+
+static const FileEntry *getPrivateModuleMap(const FileEntry *File,
+ FileManager &FileMgr) {
+ StringRef Filename = llvm::sys::path::filename(File->getName());
+ SmallString<128> PrivateFilename(File->getDir()->getName());
+ if (Filename == "module.map")
+ llvm::sys::path::append(PrivateFilename, "module_private.map");
+ else if (Filename == "module.modulemap")
+ llvm::sys::path::append(PrivateFilename, "module.private.modulemap");
+ else
+ return nullptr;
+ if (auto File = FileMgr.getFile(PrivateFilename))
+ return *File;
+ return nullptr;
+}
+
+bool HeaderSearch::loadModuleMapFile(const FileEntry *File, bool IsSystem,
+ FileID ID, unsigned *Offset,
+ StringRef OriginalModuleMapFile) {
+ // Find the directory for the module. For frameworks, that may require going
+ // up from the 'Modules' directory.
+ const DirectoryEntry *Dir = nullptr;
+ if (getHeaderSearchOpts().ModuleMapFileHomeIsCwd) {
+ if (auto DirOrErr = FileMgr.getDirectory("."))
+ Dir = *DirOrErr;
+ } else {
+ if (!OriginalModuleMapFile.empty()) {
+ // We're building a preprocessed module map. Find or invent the directory
+ // that it originally occupied.
+ auto DirOrErr = FileMgr.getDirectory(
+ llvm::sys::path::parent_path(OriginalModuleMapFile));
+ if (DirOrErr) {
+ Dir = *DirOrErr;
+ } else {
+ auto *FakeFile = FileMgr.getVirtualFile(OriginalModuleMapFile, 0, 0);
+ Dir = FakeFile->getDir();
+ }
+ } else {
+ Dir = File->getDir();
+ }
+
+ StringRef DirName(Dir->getName());
+ if (llvm::sys::path::filename(DirName) == "Modules") {
+ DirName = llvm::sys::path::parent_path(DirName);
+ if (DirName.endswith(".framework"))
+ if (auto DirOrErr = FileMgr.getDirectory(DirName))
+ Dir = *DirOrErr;
+ // FIXME: This assert can fail if there's a race between the above check
+ // and the removal of the directory.
+ assert(Dir && "parent must exist");
+ }
+ }
+
+ switch (loadModuleMapFileImpl(File, IsSystem, Dir, ID, Offset)) {
+ case LMM_AlreadyLoaded:
+ case LMM_NewlyLoaded:
+ return false;
+ case LMM_NoDirectory:
+ case LMM_InvalidModuleMap:
+ return true;
+ }
+ llvm_unreachable("Unknown load module map result");
+}
+
+HeaderSearch::LoadModuleMapResult
+HeaderSearch::loadModuleMapFileImpl(const FileEntry *File, bool IsSystem,
+ const DirectoryEntry *Dir, FileID ID,
+ unsigned *Offset) {
+ assert(File && "expected FileEntry");
+
+ // Check whether we've already loaded this module map, and mark it as being
+ // loaded in case we recursively try to load it from itself.
+ auto AddResult = LoadedModuleMaps.insert(std::make_pair(File, true));
+ if (!AddResult.second)
+ return AddResult.first->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap;
+
+ if (ModMap.parseModuleMapFile(File, IsSystem, Dir, ID, Offset)) {
+ LoadedModuleMaps[File] = false;
+ return LMM_InvalidModuleMap;
+ }
+
+ // Try to load a corresponding private module map.
+ if (const FileEntry *PMMFile = getPrivateModuleMap(File, FileMgr)) {
+ if (ModMap.parseModuleMapFile(PMMFile, IsSystem, Dir)) {
+ LoadedModuleMaps[File] = false;
+ return LMM_InvalidModuleMap;
+ }
+ }
+
+ // This directory has a module map.
+ return LMM_NewlyLoaded;
+}
+
+const FileEntry *
+HeaderSearch::lookupModuleMapFile(const DirectoryEntry *Dir, bool IsFramework) {
+ if (!HSOpts->ImplicitModuleMaps)
+ return nullptr;
+ // For frameworks, the preferred spelling is Modules/module.modulemap, but
+ // module.map at the framework root is also accepted.
+ SmallString<128> ModuleMapFileName(Dir->getName());
+ if (IsFramework)
+ llvm::sys::path::append(ModuleMapFileName, "Modules");
+ llvm::sys::path::append(ModuleMapFileName, "module.modulemap");
+ if (auto F = FileMgr.getFile(ModuleMapFileName))
+ return *F;
+
+ // Continue to allow module.map
+ ModuleMapFileName = Dir->getName();
+ llvm::sys::path::append(ModuleMapFileName, "module.map");
+ if (auto F = FileMgr.getFile(ModuleMapFileName))
+ return *F;
+ return nullptr;
+}
+
+Module *HeaderSearch::loadFrameworkModule(StringRef Name,
+ const DirectoryEntry *Dir,
+ bool IsSystem) {
+ if (Module *Module = ModMap.findModule(Name))
+ return Module;
+
+ // Try to load a module map file.
+ switch (loadModuleMapFile(Dir, IsSystem, /*IsFramework*/true)) {
+ case LMM_InvalidModuleMap:
+ // Try to infer a module map from the framework directory.
+ if (HSOpts->ImplicitModuleMaps)
+ ModMap.inferFrameworkModule(Dir, IsSystem, /*Parent=*/nullptr);
+ break;
+
+ case LMM_AlreadyLoaded:
+ case LMM_NoDirectory:
+ return nullptr;
+
+ case LMM_NewlyLoaded:
+ break;
+ }
+
+ return ModMap.findModule(Name);
+}
+
+HeaderSearch::LoadModuleMapResult
+HeaderSearch::loadModuleMapFile(StringRef DirName, bool IsSystem,
+ bool IsFramework) {
+ if (auto Dir = FileMgr.getDirectory(DirName))
+ return loadModuleMapFile(*Dir, IsSystem, IsFramework);
+
+ return LMM_NoDirectory;
+}
+
+HeaderSearch::LoadModuleMapResult
+HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir, bool IsSystem,
+ bool IsFramework) {
+ auto KnownDir = DirectoryHasModuleMap.find(Dir);
+ if (KnownDir != DirectoryHasModuleMap.end())
+ return KnownDir->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap;
+
+ if (const FileEntry *ModuleMapFile = lookupModuleMapFile(Dir, IsFramework)) {
+ LoadModuleMapResult Result =
+ loadModuleMapFileImpl(ModuleMapFile, IsSystem, Dir);
+ // Add Dir explicitly in case ModuleMapFile is in a subdirectory.
+ // E.g. Foo.framework/Modules/module.modulemap
+ // ^Dir ^ModuleMapFile
+ if (Result == LMM_NewlyLoaded)
+ DirectoryHasModuleMap[Dir] = true;
+ else if (Result == LMM_InvalidModuleMap)
+ DirectoryHasModuleMap[Dir] = false;
+ return Result;
+ }
+ return LMM_InvalidModuleMap;
+}
+
+void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) {
+ Modules.clear();
+
+ if (HSOpts->ImplicitModuleMaps) {
+ // Load module maps for each of the header search directories.
+ for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
+ bool IsSystem = SearchDirs[Idx].isSystemHeaderDirectory();
+ if (SearchDirs[Idx].isFramework()) {
+ std::error_code EC;
+ SmallString<128> DirNative;
+ llvm::sys::path::native(SearchDirs[Idx].getFrameworkDir()->getName(),
+ DirNative);
+
+ // Search each of the ".framework" directories to load them as modules.
+ llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
+ for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
+ DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ if (llvm::sys::path::extension(Dir->path()) != ".framework")
+ continue;
+
+ auto FrameworkDir =
+ FileMgr.getDirectory(Dir->path());
+ if (!FrameworkDir)
+ continue;
+
+ // Load this framework module.
+ loadFrameworkModule(llvm::sys::path::stem(Dir->path()), *FrameworkDir,
+ IsSystem);
+ }
+ continue;
+ }
+
+ // FIXME: Deal with header maps.
+ if (SearchDirs[Idx].isHeaderMap())
+ continue;
+
+ // Try to load a module map file for the search directory.
+ loadModuleMapFile(SearchDirs[Idx].getDir(), IsSystem,
+ /*IsFramework*/ false);
+
+ // Try to load module map files for immediate subdirectories of this
+ // search directory.
+ loadSubdirectoryModuleMaps(SearchDirs[Idx]);
+ }
+ }
+
+ // Populate the list of modules.
+ for (ModuleMap::module_iterator M = ModMap.module_begin(),
+ MEnd = ModMap.module_end();
+ M != MEnd; ++M) {
+ Modules.push_back(M->getValue());
+ }
+}
+
+void HeaderSearch::loadTopLevelSystemModules() {
+ if (!HSOpts->ImplicitModuleMaps)
+ return;
+
+ // Load module maps for each of the header search directories.
+ for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
+ // We only care about normal header directories.
+ if (!SearchDirs[Idx].isNormalDir()) {
+ continue;
+ }
+
+ // Try to load a module map file for the search directory.
+ loadModuleMapFile(SearchDirs[Idx].getDir(),
+ SearchDirs[Idx].isSystemHeaderDirectory(),
+ SearchDirs[Idx].isFramework());
+ }
+}
+
+void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) {
+ assert(HSOpts->ImplicitModuleMaps &&
+ "Should not be loading subdirectory module maps");
+
+ if (SearchDir.haveSearchedAllModuleMaps())
+ return;
+
+ std::error_code EC;
+ SmallString<128> Dir = SearchDir.getDir()->getName();
+ FileMgr.makeAbsolutePath(Dir);
+ SmallString<128> DirNative;
+ llvm::sys::path::native(Dir, DirNative);
+ llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
+ for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ bool IsFramework = llvm::sys::path::extension(Dir->path()) == ".framework";
+ if (IsFramework == SearchDir.isFramework())
+ loadModuleMapFile(Dir->path(), SearchDir.isSystemHeaderDirectory(),
+ SearchDir.isFramework());
+ }
+
+ SearchDir.setSearchedAllModuleMaps(true);
+}
+
+std::string HeaderSearch::suggestPathToFileForDiagnostics(
+ const FileEntry *File, llvm::StringRef MainFile, bool *IsSystem) {
+ // FIXME: We assume that the path name currently cached in the FileEntry is
+ // the most appropriate one for this analysis (and that it's spelled the
+ // same way as the corresponding header search path).
+ return suggestPathToFileForDiagnostics(File->getName(), /*WorkingDir=*/"",
+ MainFile, IsSystem);
+}
+
+std::string HeaderSearch::suggestPathToFileForDiagnostics(
+ llvm::StringRef File, llvm::StringRef WorkingDir, llvm::StringRef MainFile,
+ bool *IsSystem) {
+ using namespace llvm::sys;
+
+ unsigned BestPrefixLength = 0;
+ // Checks whether Dir and File shares a common prefix, if they do and that's
+ // the longest prefix we've seen so for it returns true and updates the
+ // BestPrefixLength accordingly.
+ auto CheckDir = [&](llvm::StringRef Dir) -> bool {
+ llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
+ if (!WorkingDir.empty() && !path::is_absolute(Dir))
+ fs::make_absolute(WorkingDir, DirPath);
+ path::remove_dots(DirPath, /*remove_dot_dot=*/true);
+ Dir = DirPath;
+ for (auto NI = path::begin(File), NE = path::end(File),
+ DI = path::begin(Dir), DE = path::end(Dir);
+ /*termination condition in loop*/; ++NI, ++DI) {
+ // '.' components in File are ignored.
+ while (NI != NE && *NI == ".")
+ ++NI;
+ if (NI == NE)
+ break;
+
+ // '.' components in Dir are ignored.
+ while (DI != DE && *DI == ".")
+ ++DI;
+ if (DI == DE) {
+ // Dir is a prefix of File, up to '.' components and choice of path
+ // separators.
+ unsigned PrefixLength = NI - path::begin(File);
+ if (PrefixLength > BestPrefixLength) {
+ BestPrefixLength = PrefixLength;
+ return true;
+ }
+ break;
+ }
+
+ // Consider all path separators equal.
+ if (NI->size() == 1 && DI->size() == 1 &&
+ path::is_separator(NI->front()) && path::is_separator(DI->front()))
+ continue;
+
+ if (*NI != *DI)
+ break;
+ }
+ return false;
+ };
+
+ for (unsigned I = 0; I != SearchDirs.size(); ++I) {
+ // FIXME: Support this search within frameworks and header maps.
+ if (!SearchDirs[I].isNormalDir())
+ continue;
+
+ StringRef Dir = SearchDirs[I].getDir()->getName();
+ if (CheckDir(Dir) && IsSystem)
+ *IsSystem = BestPrefixLength ? I >= SystemDirIdx : false;
+ }
+
+ // Try to shorten include path using TUs directory, if we couldn't find any
+ // suitable prefix in include search paths.
+ if (!BestPrefixLength && CheckDir(path::parent_path(MainFile)) && IsSystem)
+ *IsSystem = false;
+
+
+ return path::convert_to_slash(File.drop_front(BestPrefixLength));
+}
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
new file mode 100644
index 000000000000..17f5ab1e035d
--- /dev/null
+++ b/clang/lib/Lex/Lexer.cpp
@@ -0,0 +1,3951 @@
+//===- Lexer.cpp - C Language Family Lexer --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Lexer and Token interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Lexer.h"
+#include "UnicodeCharSets.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MultipleIncludeOpt.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Lex/Token.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/NativeFormatting.h"
+#include "llvm/Support/UnicodeCharRanges.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <tuple>
+#include <utility>
+
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Token Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
+bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
+ if (isAnnotation())
+ return false;
+ if (IdentifierInfo *II = getIdentifierInfo())
+ return II->getObjCKeywordID() == objcKey;
+ return false;
+}
+
+/// getObjCKeywordID - Return the ObjC keyword kind.
+tok::ObjCKeywordKind Token::getObjCKeywordID() const {
+ if (isAnnotation())
+ return tok::objc_not_keyword;
+ IdentifierInfo *specId = getIdentifierInfo();
+ return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
+}
+
+//===----------------------------------------------------------------------===//
+// Lexer Class Implementation
+//===----------------------------------------------------------------------===//
+
+void Lexer::anchor() {}
+
+void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
+ const char *BufEnd) {
+ BufferStart = BufStart;
+ BufferPtr = BufPtr;
+ BufferEnd = BufEnd;
+
+ assert(BufEnd[0] == 0 &&
+ "We assume that the input buffer has a null character at the end"
+ " to simplify lexing!");
+
+ // Check whether we have a BOM in the beginning of the buffer. If yes - act
+ // accordingly. Right now we support only UTF-8 with and without BOM, so, just
+ // skip the UTF-8 BOM if it's present.
+ if (BufferStart == BufferPtr) {
+ // Determine the size of the BOM.
+ StringRef Buf(BufferStart, BufferEnd - BufferStart);
+ size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
+ .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
+ .Default(0);
+
+ // Skip the BOM.
+ BufferPtr += BOMLength;
+ }
+
+ Is_PragmaLexer = false;
+ CurrentConflictMarkerState = CMK_None;
+
+ // Start of the file is a start of line.
+ IsAtStartOfLine = true;
+ IsAtPhysicalStartOfLine = true;
+
+ HasLeadingSpace = false;
+ HasLeadingEmptyMacro = false;
+
+ // We are not after parsing a #.
+ ParsingPreprocessorDirective = false;
+
+ // We are not after parsing #include.
+ ParsingFilename = false;
+
+ // We are not in raw mode. Raw mode disables diagnostics and interpretation
+ // of tokens (e.g. identifiers, thus disabling macro expansion). It is used
+ // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
+ // or otherwise skipping over tokens.
+ LexingRawMode = false;
+
+ // Default to not keeping comments.
+ ExtendedTokenMode = 0;
+}
+
+/// Lexer constructor - Create a new lexer object for the specified buffer
+/// with the specified preprocessor managing the lexing process. This lexer
+/// assumes that the associated file buffer and Preprocessor objects will
+/// outlive it, so it doesn't take ownership of either of them.
+Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
+ : PreprocessorLexer(&PP, FID),
+ FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
+ LangOpts(PP.getLangOpts()) {
+ InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
+ InputFile->getBufferEnd());
+
+ resetExtendedTokenMode();
+}
+
+/// Lexer constructor - Create a new raw lexer object. This object is only
+/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text
+/// range will outlive it, so it doesn't take ownership of it.
+Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts,
+ const char *BufStart, const char *BufPtr, const char *BufEnd)
+ : FileLoc(fileloc), LangOpts(langOpts) {
+ InitLexer(BufStart, BufPtr, BufEnd);
+
+ // We *are* in raw mode.
+ LexingRawMode = true;
+}
+
+/// Lexer constructor - Create a new raw lexer object. This object is only
+/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text
+/// range will outlive it, so it doesn't take ownership of it.
+Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile,
+ const SourceManager &SM, const LangOptions &langOpts)
+ : Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
+ FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
+
+void Lexer::resetExtendedTokenMode() {
+ assert(PP && "Cannot reset token mode without a preprocessor");
+ if (LangOpts.TraditionalCPP)
+ SetKeepWhitespaceMode(true);
+ else
+ SetCommentRetentionState(PP->getCommentRetentionState());
+}
+
+/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
+/// _Pragma expansion. This has a variety of magic semantics that this method
+/// sets up. It returns a new'd Lexer that must be delete'd when done.
+///
+/// On entrance to this routine, TokStartLoc is a macro location which has a
+/// spelling loc that indicates the bytes to be lexed for the token and an
+/// expansion location that indicates where all lexed tokens should be
+/// "expanded from".
+///
+/// TODO: It would really be nice to make _Pragma just be a wrapper around a
+/// normal lexer that remaps tokens as they fly by. This would require making
+/// Preprocessor::Lex virtual. Given that, we could just dump in a magic lexer
+/// interface that could handle this stuff. This would pull GetMappedTokenLoc
+/// out of the critical path of the lexer!
+///
+Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd,
+ unsigned TokLen, Preprocessor &PP) {
+ SourceManager &SM = PP.getSourceManager();
+
+ // Create the lexer as if we were going to lex the file normally.
+ FileID SpellingFID = SM.getFileID(SpellingLoc);
+ const llvm::MemoryBuffer *InputFile = SM.getBuffer(SpellingFID);
+ Lexer *L = new Lexer(SpellingFID, InputFile, PP);
+
+ // Now that the lexer is created, change the start/end locations so that we
+ // just lex the subsection of the file that we want. This is lexing from a
+ // scratch buffer.
+ const char *StrData = SM.getCharacterData(SpellingLoc);
+
+ L->BufferPtr = StrData;
+ L->BufferEnd = StrData+TokLen;
+ assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!");
+
+ // Set the SourceLocation with the remapping information. This ensures that
+ // GetMappedTokenLoc will remap the tokens as they are lexed.
+ L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID),
+ ExpansionLocStart,
+ ExpansionLocEnd, TokLen);
+
+ // Ensure that the lexer thinks it is inside a directive, so that end \n will
+ // return an EOD token.
+ L->ParsingPreprocessorDirective = true;
+
+ // This lexer really is for _Pragma.
+ L->Is_PragmaLexer = true;
+ return L;
+}
+
+bool Lexer::skipOver(unsigned NumBytes) {
+ IsAtPhysicalStartOfLine = true;
+ IsAtStartOfLine = true;
+ if ((BufferPtr + NumBytes) > BufferEnd)
+ return true;
+ BufferPtr += NumBytes;
+ return false;
+}
+
+template <typename T> static void StringifyImpl(T &Str, char Quote) {
+ typename T::size_type i = 0, e = Str.size();
+ while (i < e) {
+ if (Str[i] == '\\' || Str[i] == Quote) {
+ Str.insert(Str.begin() + i, '\\');
+ i += 2;
+ ++e;
+ } else if (Str[i] == '\n' || Str[i] == '\r') {
+ // Replace '\r\n' and '\n\r' to '\\' followed by 'n'.
+ if ((i < e - 1) && (Str[i + 1] == '\n' || Str[i + 1] == '\r') &&
+ Str[i] != Str[i + 1]) {
+ Str[i] = '\\';
+ Str[i + 1] = 'n';
+ } else {
+ // Replace '\n' and '\r' to '\\' followed by 'n'.
+ Str[i] = '\\';
+ Str.insert(Str.begin() + i + 1, 'n');
+ ++e;
+ }
+ i += 2;
+ } else
+ ++i;
+ }
+}
+
+std::string Lexer::Stringify(StringRef Str, bool Charify) {
+ std::string Result = Str;
+ char Quote = Charify ? '\'' : '"';
+ StringifyImpl(Result, Quote);
+ return Result;
+}
+
+void Lexer::Stringify(SmallVectorImpl<char> &Str) { StringifyImpl(Str, '"'); }
+
+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+/// Slow case of getSpelling. Extract the characters comprising the
+/// spelling of this token from the provided input buffer.
+static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
+ const LangOptions &LangOpts, char *Spelling) {
+ assert(Tok.needsCleaning() && "getSpellingSlow called on simple token");
+
+ size_t Length = 0;
+ const char *BufEnd = BufPtr + Tok.getLength();
+
+ if (tok::isStringLiteral(Tok.getKind())) {
+ // Munch the encoding-prefix and opening double-quote.
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+
+ if (Spelling[Length - 1] == '"')
+ break;
+ }
+
+ // Raw string literals need special handling; trigraph expansion and line
+ // splicing do not occur within their d-char-sequence nor within their
+ // r-char-sequence.
+ if (Length >= 2 &&
+ Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+ // Search backwards from the end of the token to find the matching closing
+ // quote.
+ const char *RawEnd = BufEnd;
+ do --RawEnd; while (*RawEnd != '"');
+ size_t RawLength = RawEnd - BufPtr + 1;
+
+ // Everything between the quotes is included verbatim in the spelling.
+ memcpy(Spelling + Length, BufPtr, RawLength);
+ Length += RawLength;
+ BufPtr += RawLength;
+
+ // The rest of the token is lexed normally.
+ }
+ }
+
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+ }
+
+ assert(Length < Tok.getLength() &&
+ "NeedsCleaning flag set on token that didn't need cleaning!");
+ return Length;
+}
+
+/// getSpelling() - Return the 'spelling' of this token. The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding. In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+StringRef Lexer::getSpelling(SourceLocation loc,
+ SmallVectorImpl<char> &buffer,
+ const SourceManager &SM,
+ const LangOptions &options,
+ bool *invalid) {
+ // Break down the source location.
+ std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
+
+ // Try to the load the file buffer.
+ bool invalidTemp = false;
+ StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
+ if (invalidTemp) {
+ if (invalid) *invalid = true;
+ return {};
+ }
+
+ const char *tokenBegin = file.data() + locInfo.second;
+
+ // Lex from the start of the given location.
+ Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options,
+ file.begin(), tokenBegin, file.end());
+ Token token;
+ lexer.LexFromRawLexer(token);
+
+ unsigned length = token.getLength();
+
+ // Common case: no need for cleaning.
+ if (!token.needsCleaning())
+ return StringRef(tokenBegin, length);
+
+ // Hard case, we need to relex the characters into the string.
+ buffer.resize(length);
+ buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data()));
+ return StringRef(buffer.data(), buffer.size());
+}
+
+/// getSpelling() - Return the 'spelling' of this token. The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding. In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
+ const LangOptions &LangOpts, bool *Invalid) {
+ assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+ bool CharDataInvalid = false;
+ const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
+ &CharDataInvalid);
+ if (Invalid)
+ *Invalid = CharDataInvalid;
+ if (CharDataInvalid)
+ return {};
+
+ // If this token contains nothing interesting, return it directly.
+ if (!Tok.needsCleaning())
+ return std::string(TokStart, TokStart + Tok.getLength());
+
+ std::string Result;
+ Result.resize(Tok.getLength());
+ Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
+ return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string. The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long. The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy). The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
+ const SourceManager &SourceMgr,
+ const LangOptions &LangOpts, bool *Invalid) {
+ assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+ const char *TokStart = nullptr;
+ // NOTE: this has to be checked *before* testing for an IdentifierInfo.
+ if (Tok.is(tok::raw_identifier))
+ TokStart = Tok.getRawIdentifier().data();
+ else if (!Tok.hasUCN()) {
+ if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+ // Just return the string from the identifier table, which is very quick.
+ Buffer = II->getNameStart();
+ return II->getLength();
+ }
+ }
+
+ // NOTE: this can be checked even after testing for an IdentifierInfo.
+ if (Tok.isLiteral())
+ TokStart = Tok.getLiteralData();
+
+ if (!TokStart) {
+ // Compute the start of the token in the input lexer buffer.
+ bool CharDataInvalid = false;
+ TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+ if (Invalid)
+ *Invalid = CharDataInvalid;
+ if (CharDataInvalid) {
+ Buffer = "";
+ return 0;
+ }
+ }
+
+ // If this token contains nothing interesting, return it directly.
+ if (!Tok.needsCleaning()) {
+ Buffer = TokStart;
+ return Tok.getLength();
+ }
+
+ // Otherwise, hard case, relex the characters into the string.
+ return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
+}
+
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file. If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ Token TheTok;
+ if (getRawToken(Loc, TheTok, SM, LangOpts))
+ return 0;
+ return TheTok.getLength();
+}
+
+/// Relex the token at the specified location.
+/// \returns true if there was a failure, false on success.
+bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ bool IgnoreWhiteSpace) {
+ // TODO: this could be special cased for common tokens like identifiers, ')',
+ // etc to make this faster, if it mattered. Just look at StrData[0] to handle
+ // all obviously single-char tokens. This could use
+ // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+ // something.
+
+ // If this comes from a macro expansion, we really do want the macro name, not
+ // the token this macro expanded to.
+ Loc = SM.getExpansionLoc(Loc);
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+ bool Invalid = false;
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ if (Invalid)
+ return true;
+
+ const char *StrData = Buffer.data()+LocInfo.second;
+
+ if (!IgnoreWhiteSpace && isWhitespace(StrData[0]))
+ return true;
+
+ // Create a lexer starting at the beginning of this token.
+ Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts,
+ Buffer.begin(), StrData, Buffer.end());
+ TheLexer.SetCommentRetentionState(true);
+ TheLexer.LexFromRawLexer(Result);
+ return false;
+}
+
+/// Returns the pointer that points to the beginning of line that contains
+/// the given offset, or null if the offset if invalid.
+static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) {
+ const char *BufStart = Buffer.data();
+ if (Offset >= Buffer.size())
+ return nullptr;
+
+ const char *LexStart = BufStart + Offset;
+ for (; LexStart != BufStart; --LexStart) {
+ if (isVerticalWhitespace(LexStart[0]) &&
+ !Lexer::isNewLineEscaped(BufStart, LexStart)) {
+ // LexStart should point at first character of logical line.
+ ++LexStart;
+ break;
+ }
+ }
+ return LexStart;
+}
+
+static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ assert(Loc.isFileID());
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+ if (LocInfo.first.isInvalid())
+ return Loc;
+
+ bool Invalid = false;
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ if (Invalid)
+ return Loc;
+
+ // Back up from the current location until we hit the beginning of a line
+ // (or the buffer). We'll relex from that point.
+ const char *StrData = Buffer.data() + LocInfo.second;
+ const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);
+ if (!LexStart || LexStart == StrData)
+ return Loc;
+
+ // Create a lexer starting at the beginning of this token.
+ SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
+ Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
+ Buffer.end());
+ TheLexer.SetCommentRetentionState(true);
+
+ // Lex tokens until we find the token that contains the source location.
+ Token TheTok;
+ do {
+ TheLexer.LexFromRawLexer(TheTok);
+
+ if (TheLexer.getBufferLocation() > StrData) {
+ // Lexing this token has taken the lexer past the source location we're
+ // looking for. If the current token encompasses our source location,
+ // return the beginning of that token.
+ if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)
+ return TheTok.getLocation();
+
+ // We ended up skipping over the source location entirely, which means
+ // that it points into whitespace. We're done here.
+ break;
+ }
+ } while (TheTok.getKind() != tok::eof);
+
+ // We've passed our source location; just return the original source location.
+ return Loc;
+}
+
+SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ if (Loc.isFileID())
+ return getBeginningOfFileToken(Loc, SM, LangOpts);
+
+ if (!SM.isMacroArgExpansion(Loc))
+ return Loc;
+
+ SourceLocation FileLoc = SM.getSpellingLoc(Loc);
+ SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts);
+ std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);
+ std::pair<FileID, unsigned> BeginFileLocInfo =
+ SM.getDecomposedLoc(BeginFileLoc);
+ assert(FileLocInfo.first == BeginFileLocInfo.first &&
+ FileLocInfo.second >= BeginFileLocInfo.second);
+ return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second);
+}
+
+namespace {
+
+enum PreambleDirectiveKind {
+ PDK_Skipped,
+ PDK_Unknown
+};
+
+} // namespace
+
+PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
+ const LangOptions &LangOpts,
+ unsigned MaxLines) {
+ // Create a lexer starting at the beginning of the file. Note that we use a
+ // "fake" file source location at offset 1 so that the lexer will track our
+ // position within the file.
+ const unsigned StartOffset = 1;
+ SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset);
+ Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
+ Buffer.end());
+ TheLexer.SetCommentRetentionState(true);
+
+ bool InPreprocessorDirective = false;
+ Token TheTok;
+ SourceLocation ActiveCommentLoc;
+
+ unsigned MaxLineOffset = 0;
+ if (MaxLines) {
+ const char *CurPtr = Buffer.begin();
+ unsigned CurLine = 0;
+ while (CurPtr != Buffer.end()) {
+ char ch = *CurPtr++;
+ if (ch == '\n') {
+ ++CurLine;
+ if (CurLine == MaxLines)
+ break;
+ }
+ }
+ if (CurPtr != Buffer.end())
+ MaxLineOffset = CurPtr - Buffer.begin();
+ }
+
+ do {
+ TheLexer.LexFromRawLexer(TheTok);
+
+ if (InPreprocessorDirective) {
+ // If we've hit the end of the file, we're done.
+ if (TheTok.getKind() == tok::eof) {
+ break;
+ }
+
+ // If we haven't hit the end of the preprocessor directive, skip this
+ // token.
+ if (!TheTok.isAtStartOfLine())
+ continue;
+
+ // We've passed the end of the preprocessor directive, and will look
+ // at this token again below.
+ InPreprocessorDirective = false;
+ }
+
+ // Keep track of the # of lines in the preamble.
+ if (TheTok.isAtStartOfLine()) {
+ unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset;
+
+ // If we were asked to limit the number of lines in the preamble,
+ // and we're about to exceed that limit, we're done.
+ if (MaxLineOffset && TokOffset >= MaxLineOffset)
+ break;
+ }
+
+ // Comments are okay; skip over them.
+ if (TheTok.getKind() == tok::comment) {
+ if (ActiveCommentLoc.isInvalid())
+ ActiveCommentLoc = TheTok.getLocation();
+ continue;
+ }
+
+ if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
+ // This is the start of a preprocessor directive.
+ Token HashTok = TheTok;
+ InPreprocessorDirective = true;
+ ActiveCommentLoc = SourceLocation();
+
+ // Figure out which directive this is. Since we're lexing raw tokens,
+ // we don't have an identifier table available. Instead, just look at
+ // the raw identifier to recognize and categorize preprocessor directives.
+ TheLexer.LexFromRawLexer(TheTok);
+ if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
+ StringRef Keyword = TheTok.getRawIdentifier();
+ PreambleDirectiveKind PDK
+ = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
+ .Case("include", PDK_Skipped)
+ .Case("__include_macros", PDK_Skipped)
+ .Case("define", PDK_Skipped)
+ .Case("undef", PDK_Skipped)
+ .Case("line", PDK_Skipped)
+ .Case("error", PDK_Skipped)
+ .Case("pragma", PDK_Skipped)
+ .Case("import", PDK_Skipped)
+ .Case("include_next", PDK_Skipped)
+ .Case("warning", PDK_Skipped)
+ .Case("ident", PDK_Skipped)
+ .Case("sccs", PDK_Skipped)
+ .Case("assert", PDK_Skipped)
+ .Case("unassert", PDK_Skipped)
+ .Case("if", PDK_Skipped)
+ .Case("ifdef", PDK_Skipped)
+ .Case("ifndef", PDK_Skipped)
+ .Case("elif", PDK_Skipped)
+ .Case("else", PDK_Skipped)
+ .Case("endif", PDK_Skipped)
+ .Default(PDK_Unknown);
+
+ switch (PDK) {
+ case PDK_Skipped:
+ continue;
+
+ case PDK_Unknown:
+ // We don't know what this directive is; stop at the '#'.
+ break;
+ }
+ }
+
+ // We only end up here if we didn't recognize the preprocessor
+ // directive or it was one that can't occur in the preamble at this
+ // point. Roll back the current token to the location of the '#'.
+ TheTok = HashTok;
+ }
+
+ // We hit a token that we don't recognize as being in the
+ // "preprocessing only" part of the file, so we're no longer in
+ // the preamble.
+ break;
+ } while (true);
+
+ SourceLocation End;
+ if (ActiveCommentLoc.isValid())
+ End = ActiveCommentLoc; // don't truncate a decl comment.
+ else
+ End = TheTok.getLocation();
+
+ return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),
+ TheTok.isAtStartOfLine());
+}
+
+unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ // Figure out how many physical characters away the specified expansion
+ // character is. This needs to take into consideration newlines and
+ // trigraphs.
+ bool Invalid = false;
+ const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);
+
+ // If they request the first char of the token, we're trivially done.
+ if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
+ return 0;
+
+ unsigned PhysOffset = 0;
+
+ // The usual case is that tokens don't contain anything interesting. Skip
+ // over the uninteresting characters. If a token only consists of simple
+ // chars, this method is extremely fast.
+ while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
+ if (CharNo == 0)
+ return PhysOffset;
+ ++TokPtr;
+ --CharNo;
+ ++PhysOffset;
+ }
+
+ // If we have a character that may be a trigraph or escaped newline, use a
+ // lexer to parse it correctly.
+ for (; CharNo; --CharNo) {
+ unsigned Size;
+ Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);
+ TokPtr += Size;
+ PhysOffset += Size;
+ }
+
+ // Final detail: if we end up on an escaped newline, we want to return the
+ // location of the actual byte of the token. For example foo\<newline>bar
+ // advanced by 3 should return the location of b, not of \\. One compounding
+ // detail of this is that the escape may be made by a trigraph.
+ if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
+ PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+
+ return PhysOffset;
+}
+
+/// Computes the source location just past the end of the
+/// token at this source location.
+///
+/// This routine can be used to produce a source location that
+/// points just past the end of the token referenced by \p Loc, and
+/// is generally used when a diagnostic needs to point just after a
+/// token where it expected something different that it received. If
+/// the returned source location would not be meaningful (e.g., if
+/// it points into a macro), this routine returns an invalid
+/// source location.
+///
+/// \param Offset an offset from the end of the token, where the source
+/// location should refer to. The default offset (0) produces a source
+/// location pointing just past the end of the token; an offset of 1 produces
+/// a source location pointing to the last character in the token, etc.
+SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ if (Loc.isInvalid())
+ return {};
+
+ if (Loc.isMacroID()) {
+ if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
+ return {}; // Points inside the macro expansion.
+ }
+
+ unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
+ if (Len > Offset)
+ Len = Len - Offset;
+ else
+ return Loc;
+
+ return Loc.getLocWithOffset(Len);
+}
+
+/// Returns true if the given MacroID location points at the first
+/// token of the macro expansion.
+bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ SourceLocation *MacroBegin) {
+ assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
+
+ SourceLocation expansionLoc;
+ if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
+ return false;
+
+ if (expansionLoc.isFileID()) {
+ // No other macro expansions, this is the first.
+ if (MacroBegin)
+ *MacroBegin = expansionLoc;
+ return true;
+ }
+
+ return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin);
+}
+
+/// Returns true if the given MacroID location points at the last
+/// token of the macro expansion.
+bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ SourceLocation *MacroEnd) {
+ assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
+
+ SourceLocation spellLoc = SM.getSpellingLoc(loc);
+ unsigned tokLen = MeasureTokenLength(spellLoc, SM, LangOpts);
+ if (tokLen == 0)
+ return false;
+
+ SourceLocation afterLoc = loc.getLocWithOffset(tokLen);
+ SourceLocation expansionLoc;
+ if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
+ return false;
+
+ if (expansionLoc.isFileID()) {
+ // No other macro expansions.
+ if (MacroEnd)
+ *MacroEnd = expansionLoc;
+ return true;
+ }
+
+ return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts, MacroEnd);
+}
+
+static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ SourceLocation Begin = Range.getBegin();
+ SourceLocation End = Range.getEnd();
+ assert(Begin.isFileID() && End.isFileID());
+ if (Range.isTokenRange()) {
+ End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts);
+ if (End.isInvalid())
+ return {};
+ }
+
+ // Break down the source locations.
+ FileID FID;
+ unsigned BeginOffs;
+ std::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin);
+ if (FID.isInvalid())
+ return {};
+
+ unsigned EndOffs;
+ if (!SM.isInFileID(End, FID, &EndOffs) ||
+ BeginOffs > EndOffs)
+ return {};
+
+ return CharSourceRange::getCharRange(Begin, End);
+}
+
+CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ SourceLocation Begin = Range.getBegin();
+ SourceLocation End = Range.getEnd();
+ if (Begin.isInvalid() || End.isInvalid())
+ return {};
+
+ if (Begin.isFileID() && End.isFileID())
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+
+ if (Begin.isMacroID() && End.isFileID()) {
+ if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin))
+ return {};
+ Range.setBegin(Begin);
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+ }
+
+ if (Begin.isFileID() && End.isMacroID()) {
+ if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
+ &End)) ||
+ (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
+ &End)))
+ return {};
+ Range.setEnd(End);
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+ }
+
+ assert(Begin.isMacroID() && End.isMacroID());
+ SourceLocation MacroBegin, MacroEnd;
+ if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) &&
+ ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts,
+ &MacroEnd)) ||
+ (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts,
+ &MacroEnd)))) {
+ Range.setBegin(MacroBegin);
+ Range.setEnd(MacroEnd);
+ return makeRangeFromFileLocs(Range, SM, LangOpts);
+ }
+
+ bool Invalid = false;
+ const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin),
+ &Invalid);
+ if (Invalid)
+ return {};
+
+ if (BeginEntry.getExpansion().isMacroArgExpansion()) {
+ const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End),
+ &Invalid);
+ if (Invalid)
+ return {};
+
+ if (EndEntry.getExpansion().isMacroArgExpansion() &&
+ BeginEntry.getExpansion().getExpansionLocStart() ==
+ EndEntry.getExpansion().getExpansionLocStart()) {
+ Range.setBegin(SM.getImmediateSpellingLoc(Begin));
+ Range.setEnd(SM.getImmediateSpellingLoc(End));
+ return makeFileCharRange(Range, SM, LangOpts);
+ }
+ }
+
+ return {};
+}
+
+StringRef Lexer::getSourceText(CharSourceRange Range,
+ const SourceManager &SM,
+ const LangOptions &LangOpts,
+ bool *Invalid) {
+ Range = makeFileCharRange(Range, SM, LangOpts);
+ if (Range.isInvalid()) {
+ if (Invalid) *Invalid = true;
+ return {};
+ }
+
+ // Break down the source location.
+ std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin());
+ if (beginInfo.first.isInvalid()) {
+ if (Invalid) *Invalid = true;
+ return {};
+ }
+
+ unsigned EndOffs;
+ if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
+ beginInfo.second > EndOffs) {
+ if (Invalid) *Invalid = true;
+ return {};
+ }
+
+ // Try to the load the file buffer.
+ bool invalidTemp = false;
+ StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp);
+ if (invalidTemp) {
+ if (Invalid) *Invalid = true;
+ return {};
+ }
+
+ if (Invalid) *Invalid = false;
+ return file.substr(beginInfo.second, EndOffs - beginInfo.second);
+}
+
+StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ assert(Loc.isMacroID() && "Only reasonable to call this on macros");
+
+ // Find the location of the immediate macro expansion.
+ while (true) {
+ FileID FID = SM.getFileID(Loc);
+ const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
+ const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
+ Loc = Expansion.getExpansionLocStart();
+ if (!Expansion.isMacroArgExpansion())
+ break;
+
+ // For macro arguments we need to check that the argument did not come
+ // from an inner macro, e.g: "MAC1( MAC2(foo) )"
+
+ // Loc points to the argument id of the macro definition, move to the
+ // macro expansion.
+ Loc = SM.getImmediateExpansionRange(Loc).getBegin();
+ SourceLocation SpellLoc = Expansion.getSpellingLoc();
+ if (SpellLoc.isFileID())
+ break; // No inner macro.
+
+ // If spelling location resides in the same FileID as macro expansion
+ // location, it means there is no inner macro.
+ FileID MacroFID = SM.getFileID(Loc);
+ if (SM.isInFileID(SpellLoc, MacroFID))
+ break;
+
+ // Argument came from inner macro.
+ Loc = SpellLoc;
+ }
+
+ // Find the spelling location of the start of the non-argument expansion
+ // range. This is where the macro name was spelled in order to begin
+ // expanding this macro.
+ Loc = SM.getSpellingLoc(Loc);
+
+ // Dig out the buffer where the macro name was spelled and the extents of the
+ // name so that we can render it into the expansion note.
+ std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc);
+ unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
+ StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first);
+ return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
+}
+
+StringRef Lexer::getImmediateMacroNameForDiagnostics(
+ SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) {
+ assert(Loc.isMacroID() && "Only reasonable to call this on macros");
+ // Walk past macro argument expansions.
+ while (SM.isMacroArgExpansion(Loc))
+ Loc = SM.getImmediateExpansionRange(Loc).getBegin();
+
+ // If the macro's spelling has no FileID, then it's actually a token paste
+ // or stringization (or similar) and not a macro at all.
+ if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc))))
+ return {};
+
+ // Find the spelling location of the start of the non-argument expansion
+ // range. This is where the macro name was spelled in order to begin
+ // expanding this macro.
+ Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).getBegin());
+
+ // Dig out the buffer where the macro name was spelled and the extents of the
+ // name so that we can render it into the expansion note.
+ std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc);
+ unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
+ StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first);
+ return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
+}
+
+bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {
+ return isIdentifierBody(c, LangOpts.DollarIdents);
+}
+
+bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) {
+ assert(isVerticalWhitespace(Str[0]));
+ if (Str - 1 < BufferStart)
+ return false;
+
+ if ((Str[0] == '\n' && Str[-1] == '\r') ||
+ (Str[0] == '\r' && Str[-1] == '\n')) {
+ if (Str - 2 < BufferStart)
+ return false;
+ --Str;
+ }
+ --Str;
+
+ // Rewind to first non-space character:
+ while (Str > BufferStart && isHorizontalWhitespace(*Str))
+ --Str;
+
+ return *Str == '\\';
+}
+
+StringRef Lexer::getIndentationForLine(SourceLocation Loc,
+ const SourceManager &SM) {
+ if (Loc.isInvalid() || Loc.isMacroID())
+ return {};
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+ if (LocInfo.first.isInvalid())
+ return {};
+ bool Invalid = false;
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ if (Invalid)
+ return {};
+ const char *Line = findBeginningOfLine(Buffer, LocInfo.second);
+ if (!Line)
+ return {};
+ StringRef Rest = Buffer.substr(Line - Buffer.data());
+ size_t NumWhitespaceChars = Rest.find_first_not_of(" \t");
+ return NumWhitespaceChars == StringRef::npos
+ ? ""
+ : Rest.take_front(NumWhitespaceChars);
+}
+
+//===----------------------------------------------------------------------===//
+// Diagnostics forwarding code.
+//===----------------------------------------------------------------------===//
+
+/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
+/// lexer buffer was all expanded at a single point, perform the mapping.
+/// This is currently only used for _Pragma implementation, so it is the slow
+/// path of the hot getSourceLocation method. Do not allow it to be inlined.
+static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(
+ Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen);
+static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
+ SourceLocation FileLoc,
+ unsigned CharNo, unsigned TokLen) {
+ assert(FileLoc.isMacroID() && "Must be a macro expansion");
+
+ // Otherwise, we're lexing "mapped tokens". This is used for things like
+ // _Pragma handling. Combine the expansion location of FileLoc with the
+ // spelling location.
+ SourceManager &SM = PP.getSourceManager();
+
+ // Create a new SLoc which is expanded from Expansion(FileLoc) but whose
+ // characters come from spelling(FileLoc)+Offset.
+ SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc);
+ SpellingLoc = SpellingLoc.getLocWithOffset(CharNo);
+
+ // Figure out the expansion loc range, which is the range covered by the
+ // original _Pragma(...) sequence.
+ CharSourceRange II = SM.getImmediateExpansionRange(FileLoc);
+
+ return SM.createExpansionLoc(SpellingLoc, II.getBegin(), II.getEnd(), TokLen);
+}
+
+/// getSourceLocation - Return a source location identifier for the specified
+/// offset in the current file.
+SourceLocation Lexer::getSourceLocation(const char *Loc,
+ unsigned TokLen) const {
+ assert(Loc >= BufferStart && Loc <= BufferEnd &&
+ "Location out of range for this buffer!");
+
+ // In the normal case, we're just lexing from a simple file buffer, return
+ // the file id from FileLoc with the offset specified.
+ unsigned CharNo = Loc-BufferStart;
+ if (FileLoc.isFileID())
+ return FileLoc.getLocWithOffset(CharNo);
+
+ // Otherwise, this is the _Pragma lexer case, which pretends that all of the
+ // tokens are lexed from where the _Pragma was defined.
+ assert(PP && "This doesn't work on raw lexers");
+ return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen);
+}
+
+/// Diag - Forwarding function for diagnostics. This translate a source
+/// position in the current buffer into a SourceLocation object for rendering.
+DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const {
+ return PP->Diag(getSourceLocation(Loc), DiagID);
+}
+
+//===----------------------------------------------------------------------===//
+// Trigraph and Escaped Newline Handling Code.
+//===----------------------------------------------------------------------===//
+
+/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
+/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
+static char GetTrigraphCharForLetter(char Letter) {
+ switch (Letter) {
+ default: return 0;
+ case '=': return '#';
+ case ')': return ']';
+ case '(': return '[';
+ case '!': return '|';
+ case '\'': return '^';
+ case '>': return '}';
+ case '/': return '\\';
+ case '<': return '{';
+ case '-': return '~';
+ }
+}
+
+/// DecodeTrigraphChar - If the specified character is a legal trigraph when
+/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled,
+/// return the result character. Finally, emit a warning about trigraph use
+/// whether trigraphs are enabled or not.
+static char DecodeTrigraphChar(const char *CP, Lexer *L) {
+ char Res = GetTrigraphCharForLetter(*CP);
+ if (!Res || !L) return Res;
+
+ if (!L->getLangOpts().Trigraphs) {
+ if (!L->isLexingRawMode())
+ L->Diag(CP-2, diag::trigraph_ignored);
+ return 0;
+ }
+
+ if (!L->isLexingRawMode())
+ L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
+ return Res;
+}
+
+/// getEscapedNewLineSize - Return the size of the specified escaped newline,
+/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a
+/// trigraph equivalent on entry to this function.
+unsigned Lexer::getEscapedNewLineSize(const char *Ptr) {
+ unsigned Size = 0;
+ while (isWhitespace(Ptr[Size])) {
+ ++Size;
+
+ if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r')
+ continue;
+
+ // If this is a \r\n or \n\r, skip the other half.
+ if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') &&
+ Ptr[Size-1] != Ptr[Size])
+ ++Size;
+
+ return Size;
+ }
+
+ // Not an escaped newline, must be a \t or something else.
+ return 0;
+}
+
+/// SkipEscapedNewLines - If P points to an escaped newline (or a series of
+/// them), skip over them and return the first non-escaped-newline found,
+/// otherwise return P.
+const char *Lexer::SkipEscapedNewLines(const char *P) {
+ while (true) {
+ const char *AfterEscape;
+ if (*P == '\\') {
+ AfterEscape = P+1;
+ } else if (*P == '?') {
+ // If not a trigraph for escape, bail out.
+ if (P[1] != '?' || P[2] != '/')
+ return P;
+ // FIXME: Take LangOpts into account; the language might not
+ // support trigraphs.
+ AfterEscape = P+3;
+ } else {
+ return P;
+ }
+
+ unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
+ if (NewLineSize == 0) return P;
+ P = AfterEscape+NewLineSize;
+ }
+}
+
+Optional<Token> Lexer::findNextToken(SourceLocation Loc,
+ const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ if (Loc.isMacroID()) {
+ if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
+ return None;
+ }
+ Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
+
+ // Break down the source location.
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+
+ // Try to load the file buffer.
+ bool InvalidTemp = false;
+ StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
+ if (InvalidTemp)
+ return None;
+
+ const char *TokenBegin = File.data() + LocInfo.second;
+
+ // Lex from the start of the given location.
+ Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
+ TokenBegin, File.end());
+ // Find the token.
+ Token Tok;
+ lexer.LexFromRawLexer(Tok);
+ return Tok;
+}
+
+/// Checks that the given token is the first token that occurs after the
+/// given location (this excludes comments and whitespace). Returns the location
+/// immediately after the specified token. If the token is not found or the
+/// location is inside a macro, the returned source location will be invalid.
+SourceLocation Lexer::findLocationAfterToken(
+ SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM,
+ const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) {
+ Optional<Token> Tok = findNextToken(Loc, SM, LangOpts);
+ if (!Tok || Tok->isNot(TKind))
+ return {};
+ SourceLocation TokenLoc = Tok->getLocation();
+
+ // Calculate how much whitespace needs to be skipped if any.
+ unsigned NumWhitespaceChars = 0;
+ if (SkipTrailingWhitespaceAndNewLine) {
+ const char *TokenEnd = SM.getCharacterData(TokenLoc) + Tok->getLength();
+ unsigned char C = *TokenEnd;
+ while (isHorizontalWhitespace(C)) {
+ C = *(++TokenEnd);
+ NumWhitespaceChars++;
+ }
+
+ // Skip \r, \n, \r\n, or \n\r
+ if (C == '\n' || C == '\r') {
+ char PrevC = C;
+ C = *(++TokenEnd);
+ NumWhitespaceChars++;
+ if ((C == '\n' || C == '\r') && C != PrevC)
+ NumWhitespaceChars++;
+ }
+ }
+
+ return TokenLoc.getLocWithOffset(Tok->getLength() + NumWhitespaceChars);
+}
+
+/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
+/// get its size, and return it. This is tricky in several cases:
+/// 1. If currently at the start of a trigraph, we warn about the trigraph,
+/// then either return the trigraph (skipping 3 chars) or the '?',
+/// depending on whether trigraphs are enabled or not.
+/// 2. If this is an escaped newline (potentially with whitespace between
+/// the backslash and newline), implicitly skip the newline and return
+/// the char after it.
+///
+/// This handles the slow/uncommon case of the getCharAndSize method. Here we
+/// know that we can accumulate into Size, and that we have already incremented
+/// Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
+/// be updated to match.
+char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
+ Token *Tok) {
+ // If we have a slash, look for an escaped newline.
+ if (Ptr[0] == '\\') {
+ ++Size;
+ ++Ptr;
+Slash:
+ // Common case, backslash-char where the char is not whitespace.
+ if (!isWhitespace(Ptr[0])) return '\\';
+
+ // See if we have optional whitespace characters between the slash and
+ // newline.
+ if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
+ // Remember that this token needs to be cleaned.
+ if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+ // Warn if there was whitespace between the backslash and newline.
+ if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode())
+ Diag(Ptr, diag::backslash_newline_space);
+
+ // Found backslash<whitespace><newline>. Parse the char after it.
+ Size += EscapedNewLineSize;
+ Ptr += EscapedNewLineSize;
+
+ // Use slow version to accumulate a correct size field.
+ return getCharAndSizeSlow(Ptr, Size, Tok);
+ }
+
+ // Otherwise, this is not an escaped newline, just return the slash.
+ return '\\';
+ }
+
+ // If this is a trigraph, process it.
+ if (Ptr[0] == '?' && Ptr[1] == '?') {
+ // If this is actually a legal trigraph (not something like "??x"), emit
+ // a trigraph warning. If so, and if trigraphs are enabled, return it.
+ if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : nullptr)) {
+ // Remember that this token needs to be cleaned.
+ if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+ Ptr += 3;
+ Size += 3;
+ if (C == '\\') goto Slash;
+ return C;
+ }
+ }
+
+ // If this is neither, return a single character.
+ ++Size;
+ return *Ptr;
+}
+
+/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
+/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size,
+/// and that we have already incremented Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
+/// be updated to match.
+char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+ const LangOptions &LangOpts) {
+ // If we have a slash, look for an escaped newline.
+ if (Ptr[0] == '\\') {
+ ++Size;
+ ++Ptr;
+Slash:
+ // Common case, backslash-char where the char is not whitespace.
+ if (!isWhitespace(Ptr[0])) return '\\';
+
+ // See if we have optional whitespace characters followed by a newline.
+ if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
+ // Found backslash<whitespace><newline>. Parse the char after it.
+ Size += EscapedNewLineSize;
+ Ptr += EscapedNewLineSize;
+
+ // Use slow version to accumulate a correct size field.
+ return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
+ }
+
+ // Otherwise, this is not an escaped newline, just return the slash.
+ return '\\';
+ }
+
+ // If this is a trigraph, process it.
+ if (LangOpts.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
+ // If this is actually a legal trigraph (not something like "??x"), return
+ // it.
+ if (char C = GetTrigraphCharForLetter(Ptr[2])) {
+ Ptr += 3;
+ Size += 3;
+ if (C == '\\') goto Slash;
+ return C;
+ }
+ }
+
+ // If this is neither, return a single character.
+ ++Size;
+ return *Ptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper methods for lexing.
+//===----------------------------------------------------------------------===//
+
+/// Routine that indiscriminately sets the offset into the source file.
+void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
+ BufferPtr = BufferStart + Offset;
+ if (BufferPtr > BufferEnd)
+ BufferPtr = BufferEnd;
+ // FIXME: What exactly does the StartOfLine bit mean? There are two
+ // possible meanings for the "start" of the line: the first token on the
+ // unexpanded line, or the first token on the expanded line.
+ IsAtStartOfLine = StartOfLine;
+ IsAtPhysicalStartOfLine = StartOfLine;
+}
+
+static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
+ if (LangOpts.AsmPreprocessor) {
+ return false;
+ } else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+ static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
+ C11AllowedIDCharRanges);
+ return C11AllowedIDChars.contains(C);
+ } else if (LangOpts.CPlusPlus) {
+ static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
+ CXX03AllowedIDCharRanges);
+ return CXX03AllowedIDChars.contains(C);
+ } else {
+ static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
+ C99AllowedIDCharRanges);
+ return C99AllowedIDChars.contains(C);
+ }
+}
+
+static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) {
+ assert(isAllowedIDChar(C, LangOpts));
+ if (LangOpts.AsmPreprocessor) {
+ return false;
+ } else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+ static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
+ C11DisallowedInitialIDCharRanges);
+ return !C11DisallowedInitialIDChars.contains(C);
+ } else if (LangOpts.CPlusPlus) {
+ return true;
+ } else {
+ static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
+ C99DisallowedInitialIDCharRanges);
+ return !C99DisallowedInitialIDChars.contains(C);
+ }
+}
+
+static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin,
+ const char *End) {
+ return CharSourceRange::getCharRange(L.getSourceLocation(Begin),
+ L.getSourceLocation(End));
+}
+
+static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
+ CharSourceRange Range, bool IsFirst) {
+ // Check C99 compatibility.
+ if (!Diags.isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
+ enum {
+ CannotAppearInIdentifier = 0,
+ CannotStartIdentifier
+ };
+
+ static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
+ C99AllowedIDCharRanges);
+ static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
+ C99DisallowedInitialIDCharRanges);
+ if (!C99AllowedIDChars.contains(C)) {
+ Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
+ << Range
+ << CannotAppearInIdentifier;
+ } else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
+ Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
+ << Range
+ << CannotStartIdentifier;
+ }
+ }
+
+ // Check C++98 compatibility.
+ if (!Diags.isIgnored(diag::warn_cxx98_compat_unicode_id, Range.getBegin())) {
+ static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
+ CXX03AllowedIDCharRanges);
+ if (!CXX03AllowedIDChars.contains(C)) {
+ Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id)
+ << Range;
+ }
+ }
+}
+
+/// After encountering UTF-8 character C and interpreting it as an identifier
+/// character, check whether it's a homoglyph for a common non-identifier
+/// source character that is unlikely to be an intentional identifier
+/// character and warn if so.
+static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
+ CharSourceRange Range) {
+ // FIXME: Handle Unicode quotation marks (smart quotes, fullwidth quotes).
+ struct HomoglyphPair {
+ uint32_t Character;
+ char LooksLike;
+ bool operator<(HomoglyphPair R) const { return Character < R.Character; }
+ };
+ static constexpr HomoglyphPair SortedHomoglyphs[] = {
+ {U'\u00ad', 0}, // SOFT HYPHEN
+ {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK
+ {U'\u037e', ';'}, // GREEK QUESTION MARK
+ {U'\u200b', 0}, // ZERO WIDTH SPACE
+ {U'\u200c', 0}, // ZERO WIDTH NON-JOINER
+ {U'\u200d', 0}, // ZERO WIDTH JOINER
+ {U'\u2060', 0}, // WORD JOINER
+ {U'\u2061', 0}, // FUNCTION APPLICATION
+ {U'\u2062', 0}, // INVISIBLE TIMES
+ {U'\u2063', 0}, // INVISIBLE SEPARATOR
+ {U'\u2064', 0}, // INVISIBLE PLUS
+ {U'\u2212', '-'}, // MINUS SIGN
+ {U'\u2215', '/'}, // DIVISION SLASH
+ {U'\u2216', '\\'}, // SET MINUS
+ {U'\u2217', '*'}, // ASTERISK OPERATOR
+ {U'\u2223', '|'}, // DIVIDES
+ {U'\u2227', '^'}, // LOGICAL AND
+ {U'\u2236', ':'}, // RATIO
+ {U'\u223c', '~'}, // TILDE OPERATOR
+ {U'\ua789', ':'}, // MODIFIER LETTER COLON
+ {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE
+ {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK
+ {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN
+ {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN
+ {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN
+ {U'\uff06', '&'}, // FULLWIDTH AMPERSAND
+ {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS
+ {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS
+ {U'\uff0a', '*'}, // FULLWIDTH ASTERISK
+ {U'\uff0b', '+'}, // FULLWIDTH ASTERISK
+ {U'\uff0c', ','}, // FULLWIDTH COMMA
+ {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS
+ {U'\uff0e', '.'}, // FULLWIDTH FULL STOP
+ {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS
+ {U'\uff1a', ':'}, // FULLWIDTH COLON
+ {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON
+ {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN
+ {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN
+ {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN
+ {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK
+ {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT
+ {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET
+ {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS
+ {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET
+ {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT
+ {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET
+ {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE
+ {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET
+ {U'\uff5e', '~'}, // FULLWIDTH TILDE
+ {0, 0}
+ };
+ auto Homoglyph =
+ std::lower_bound(std::begin(SortedHomoglyphs),
+ std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'});
+ if (Homoglyph->Character == C) {
+ llvm::SmallString<5> CharBuf;
+ {
+ llvm::raw_svector_ostream CharOS(CharBuf);
+ llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
+ }
+ if (Homoglyph->LooksLike) {
+ const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
+ Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
+ << Range << CharBuf << LooksLikeStr;
+ } else {
+ Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
+ << Range << CharBuf;
+ }
+ }
+}
+
+bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
+ Token &Result) {
+ const char *UCNPtr = CurPtr + Size;
+ uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/nullptr);
+ if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
+ return false;
+
+ if (!isLexingRawMode())
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+ makeCharRange(*this, CurPtr, UCNPtr),
+ /*IsFirst=*/false);
+
+ Result.setFlag(Token::HasUCN);
+ if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
+ (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
+ CurPtr = UCNPtr;
+ else
+ while (CurPtr != UCNPtr)
+ (void)getAndAdvanceChar(CurPtr, Result);
+ return true;
+}
+
+bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
+ const char *UnicodePtr = CurPtr;
+ llvm::UTF32 CodePoint;
+ llvm::ConversionResult Result =
+ llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr,
+ (const llvm::UTF8 *)BufferEnd,
+ &CodePoint,
+ llvm::strictConversion);
+ if (Result != llvm::conversionOK ||
+ !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
+ return false;
+
+ if (!isLexingRawMode()) {
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+ makeCharRange(*this, CurPtr, UnicodePtr),
+ /*IsFirst=*/false);
+ maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint,
+ makeCharRange(*this, CurPtr, UnicodePtr));
+ }
+
+ CurPtr = UnicodePtr;
+ return true;
+}
+
+bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
+ // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
+ unsigned Size;
+ unsigned char C = *CurPtr++;
+ while (isIdentifierBody(C))
+ C = *CurPtr++;
+
+ --CurPtr; // Back up over the skipped character.
+
+ // Fast path, no $,\,? in identifier found. '\' might be an escaped newline
+ // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
+ //
+ // TODO: Could merge these checks into an InfoTable flag to make the
+ // comparison cheaper
+ if (isASCII(C) && C != '\\' && C != '?' &&
+ (C != '$' || !LangOpts.DollarIdents)) {
+FinishIdentifier:
+ const char *IdStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
+ Result.setRawIdentifierData(IdStart);
+
+ // If we are in raw mode, return this identifier raw. There is no need to
+ // look up identifier information or attempt to macro expand it.
+ if (LexingRawMode)
+ return true;
+
+ // Fill in Result.IdentifierInfo and update the token kind,
+ // looking up the identifier in the identifier table.
+ IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+ // Note that we have to call PP->LookUpIdentifierInfo() even for code
+ // completion, it writes IdentifierInfo into Result, and callers rely on it.
+
+ // If the completion point is at the end of an identifier, we want to treat
+ // the identifier as incomplete even if it resolves to a macro or a keyword.
+ // This allows e.g. 'class^' to complete to 'classifier'.
+ if (isCodeCompletionPoint(CurPtr)) {
+ // Return the code-completion token.
+ Result.setKind(tok::code_completion);
+ // Skip the code-completion char and all immediate identifier characters.
+ // This ensures we get consistent behavior when completing at any point in
+ // an identifier (i.e. at the start, in the middle, at the end). Note that
+ // only simple cases (i.e. [a-zA-Z0-9_]) are supported to keep the code
+ // simpler.
+ assert(*CurPtr == 0 && "Completion character must be 0");
+ ++CurPtr;
+ // Note that code completion token is not added as a separate character
+ // when the completion point is at the end of the buffer. Therefore, we need
+ // to check if the buffer has ended.
+ if (CurPtr < BufferEnd) {
+ while (isIdentifierBody(*CurPtr))
+ ++CurPtr;
+ }
+ BufferPtr = CurPtr;
+ return true;
+ }
+
+ // Finally, now that we know we have an identifier, pass this off to the
+ // preprocessor, which may macro expand it or something.
+ if (II->isHandleIdentifierCase())
+ return PP->HandleIdentifier(Result);
+
+ return true;
+ }
+
+ // Otherwise, $,\,? in identifier found. Enter slower path.
+
+ C = getCharAndSize(CurPtr, Size);
+ while (true) {
+ if (C == '$') {
+ // If we hit a $ and they are not supported in identifiers, we are done.
+ if (!LangOpts.DollarIdents) goto FinishIdentifier;
+
+ // Otherwise, emit a diagnostic and continue.
+ if (!isLexingRawMode())
+ Diag(CurPtr, diag::ext_dollar_in_identifier);
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ C = getCharAndSize(CurPtr, Size);
+ continue;
+ } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
+ C = getCharAndSize(CurPtr, Size);
+ continue;
+ } else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
+ C = getCharAndSize(CurPtr, Size);
+ continue;
+ } else if (!isIdentifierBody(C)) {
+ goto FinishIdentifier;
+ }
+
+ // Otherwise, this character is good, consume it.
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+
+ C = getCharAndSize(CurPtr, Size);
+ while (isIdentifierBody(C)) {
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ C = getCharAndSize(CurPtr, Size);
+ }
+ }
+}
+
+/// isHexaLiteral - Return true if Start points to a hex constant.
+/// in microsoft mode (where this is supposed to be several different tokens).
+bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
+ unsigned Size;
+ char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts);
+ if (C1 != '0')
+ return false;
+ char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts);
+ return (C2 == 'x' || C2 == 'X');
+}
+
+/// LexNumericConstant - Lex the remainder of a integer or floating point
+/// constant. From[-1] is the first character lexed. Return the end of the
+/// constant.
+bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
+ unsigned Size;
+ char C = getCharAndSize(CurPtr, Size);
+ char PrevCh = 0;
+ while (isPreprocessingNumberBody(C)) {
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ PrevCh = C;
+ C = getCharAndSize(CurPtr, Size);
+ }
+
+ // If we fell out, check for a sign, due to 1e+12. If we have one, continue.
+ if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) {
+ // If we are in Microsoft mode, don't continue if the constant is hex.
+ // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1
+ if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
+ return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+ }
+
+ // If we have a hex FP constant, continue.
+ if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) {
+ // Outside C99 and C++17, we accept hexadecimal floating point numbers as a
+ // not-quite-conforming extension. Only do so if this looks like it's
+ // actually meant to be a hexfloat, and not if it has a ud-suffix.
+ bool IsHexFloat = true;
+ if (!LangOpts.C99) {
+ if (!isHexaLiteral(BufferPtr, LangOpts))
+ IsHexFloat = false;
+ else if (!getLangOpts().CPlusPlus17 &&
+ std::find(BufferPtr, CurPtr, '_') != CurPtr)
+ IsHexFloat = false;
+ }
+ if (IsHexFloat)
+ return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+ }
+
+ // If we have a digit separator, continue.
+ if (C == '\'' && getLangOpts().CPlusPlus14) {
+ unsigned NextSize;
+ char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts());
+ if (isIdentifierBody(Next)) {
+ if (!isLexingRawMode())
+ Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ CurPtr = ConsumeChar(CurPtr, NextSize, Result);
+ return LexNumericConstant(Result, CurPtr);
+ }
+ }
+
+ // If we have a UCN or UTF-8 character (perhaps in a ud-suffix), continue.
+ if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
+ return LexNumericConstant(Result, CurPtr);
+ if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
+ return LexNumericConstant(Result, CurPtr);
+
+ // Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
+ Result.setLiteralData(TokStart);
+ return true;
+}
+
+/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
+/// in C++11, or warn on a ud-suffix in C++98.
+const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
+ bool IsStringLiteral) {
+ assert(getLangOpts().CPlusPlus);
+
+ // Maximally munch an identifier.
+ unsigned Size;
+ char C = getCharAndSize(CurPtr, Size);
+ bool Consumed = false;
+
+ if (!isIdentifierHead(C)) {
+ if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
+ Consumed = true;
+ else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
+ Consumed = true;
+ else
+ return CurPtr;
+ }
+
+ if (!getLangOpts().CPlusPlus11) {
+ if (!isLexingRawMode())
+ Diag(CurPtr,
+ C == '_' ? diag::warn_cxx11_compat_user_defined_literal
+ : diag::warn_cxx11_compat_reserved_user_defined_literal)
+ << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ return CurPtr;
+ }
+
+ // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix
+ // that does not start with an underscore is ill-formed. As a conforming
+ // extension, we treat all such suffixes as if they had whitespace before
+ // them. We assume a suffix beginning with a UCN or UTF-8 character is more
+ // likely to be a ud-suffix than a macro, however, and accept that.
+ if (!Consumed) {
+ bool IsUDSuffix = false;
+ if (C == '_')
+ IsUDSuffix = true;
+ else if (IsStringLiteral && getLangOpts().CPlusPlus14) {
+ // In C++1y, we need to look ahead a few characters to see if this is a
+ // valid suffix for a string literal or a numeric literal (this could be
+ // the 'operator""if' defining a numeric literal operator).
+ const unsigned MaxStandardSuffixLength = 3;
+ char Buffer[MaxStandardSuffixLength] = { C };
+ unsigned Consumed = Size;
+ unsigned Chars = 1;
+ while (true) {
+ unsigned NextSize;
+ char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize,
+ getLangOpts());
+ if (!isIdentifierBody(Next)) {
+ // End of suffix. Check whether this is on the whitelist.
+ const StringRef CompleteSuffix(Buffer, Chars);
+ IsUDSuffix = StringLiteralParser::isValidUDSuffix(getLangOpts(),
+ CompleteSuffix);
+ break;
+ }
+
+ if (Chars == MaxStandardSuffixLength)
+ // Too long: can't be a standard suffix.
+ break;
+
+ Buffer[Chars++] = Next;
+ Consumed += NextSize;
+ }
+ }
+
+ if (!IsUDSuffix) {
+ if (!isLexingRawMode())
+ Diag(CurPtr, getLangOpts().MSVCCompat
+ ? diag::ext_ms_reserved_user_defined_literal
+ : diag::ext_reserved_user_defined_literal)
+ << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ return CurPtr;
+ }
+
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ }
+
+ Result.setFlag(Token::HasUDSuffix);
+ while (true) {
+ C = getCharAndSize(CurPtr, Size);
+ if (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); }
+ else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
+ else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
+ else break;
+ }
+
+ return CurPtr;
+}
+
+/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
+/// either " or L" or u8" or u" or U".
+bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
+ const char *AfterQuote = CurPtr;
+ // Does this string contain the \0 character?
+ const char *NulCharacter = nullptr;
+
+ if (!isLexingRawMode() &&
+ (Kind == tok::utf8_string_literal ||
+ Kind == tok::utf16_string_literal ||
+ Kind == tok::utf32_string_literal))
+ Diag(BufferPtr, getLangOpts().CPlusPlus
+ ? diag::warn_cxx98_compat_unicode_literal
+ : diag::warn_c99_compat_unicode_literal);
+
+ char C = getAndAdvanceChar(CurPtr, Result);
+ while (C != '"') {
+ // Skip escaped characters. Escaped newlines will already be processed by
+ // getAndAdvanceChar.
+ if (C == '\\')
+ C = getAndAdvanceChar(CurPtr, Result);
+
+ if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
+ Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ return true;
+ }
+
+ if (C == 0) {
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ if (ParsingFilename)
+ codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false);
+ else
+ PP->CodeCompleteNaturalLanguage();
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
+ cutOffLexing();
+ return true;
+ }
+
+ NulCharacter = CurPtr-1;
+ }
+ C = getAndAdvanceChar(CurPtr, Result);
+ }
+
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getLangOpts().CPlusPlus)
+ CurPtr = LexUDSuffix(Result, CurPtr, true);
+
+ // If a nul character existed in the string, warn about it.
+ if (NulCharacter && !isLexingRawMode())
+ Diag(NulCharacter, diag::null_in_char_or_string) << 1;
+
+ // Update the location of the token as well as the BufferPtr instance var.
+ const char *TokStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, Kind);
+ Result.setLiteralData(TokStart);
+ return true;
+}
+
+/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
+/// having lexed R", LR", u8R", uR", or UR".
+bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
+ // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
+ // Between the initial and final double quote characters of the raw string,
+ // any transformations performed in phases 1 and 2 (trigraphs,
+ // universal-character-names, and line splicing) are reverted.
+
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
+
+ unsigned PrefixLen = 0;
+
+ while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
+ ++PrefixLen;
+
+ // If the last character was not a '(', then we didn't lex a valid delimiter.
+ if (CurPtr[PrefixLen] != '(') {
+ if (!isLexingRawMode()) {
+ const char *PrefixEnd = &CurPtr[PrefixLen];
+ if (PrefixLen == 16) {
+ Diag(PrefixEnd, diag::err_raw_delim_too_long);
+ } else {
+ Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
+ << StringRef(PrefixEnd, 1);
+ }
+ }
+
+ // Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
+ // it's possible the '"' was intended to be part of the raw string, but
+ // there's not much we can do about that.
+ while (true) {
+ char C = *CurPtr++;
+
+ if (C == '"')
+ break;
+ if (C == 0 && CurPtr-1 == BufferEnd) {
+ --CurPtr;
+ break;
+ }
+ }
+
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return true;
+ }
+
+ // Save prefix and move CurPtr past it
+ const char *Prefix = CurPtr;
+ CurPtr += PrefixLen + 1; // skip over prefix and '('
+
+ while (true) {
+ char C = *CurPtr++;
+
+ if (C == ')') {
+ // Check for prefix match and closing quote.
+ if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {
+ CurPtr += PrefixLen + 1; // skip over prefix and '"'
+ break;
+ }
+ } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::err_unterminated_raw_string)
+ << StringRef(Prefix, PrefixLen);
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ return true;
+ }
+ }
+
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getLangOpts().CPlusPlus)
+ CurPtr = LexUDSuffix(Result, CurPtr, true);
+
+ // Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, Kind);
+ Result.setLiteralData(TokStart);
+ return true;
+}
+
+/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
+/// after having lexed the '<' character. This is used for #include filenames.
+bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
+ // Does this string contain the \0 character?
+ const char *NulCharacter = nullptr;
+ const char *AfterLessPos = CurPtr;
+ char C = getAndAdvanceChar(CurPtr, Result);
+ while (C != '>') {
+ // Skip escaped characters. Escaped newlines will already be processed by
+ // getAndAdvanceChar.
+ if (C == '\\')
+ C = getAndAdvanceChar(CurPtr, Result);
+
+ if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file.
+ // If the filename is unterminated, then it must just be a lone <
+ // character. Return this as such.
+ FormTokenWithChars(Result, AfterLessPos, tok::less);
+ return true;
+ }
+
+ if (C == 0) {
+ if (isCodeCompletionPoint(CurPtr - 1)) {
+ codeCompleteIncludedFile(AfterLessPos, CurPtr - 1, /*IsAngled=*/true);
+ cutOffLexing();
+ FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
+ return true;
+ }
+ NulCharacter = CurPtr-1;
+ }
+ C = getAndAdvanceChar(CurPtr, Result);
+ }
+
+ // If a nul character existed in the string, warn about it.
+ if (NulCharacter && !isLexingRawMode())
+ Diag(NulCharacter, diag::null_in_char_or_string) << 1;
+
+ // Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, tok::header_name);
+ Result.setLiteralData(TokStart);
+ return true;
+}
+
+void Lexer::codeCompleteIncludedFile(const char *PathStart,
+ const char *CompletionPoint,
+ bool IsAngled) {
+ // Completion only applies to the filename, after the last slash.
+ StringRef PartialPath(PathStart, CompletionPoint - PathStart);
+ auto Slash = PartialPath.find_last_of(LangOpts.MSVCCompat ? "/\\" : "/");
+ StringRef Dir =
+ (Slash == StringRef::npos) ? "" : PartialPath.take_front(Slash);
+ const char *StartOfFilename =
+ (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
+ // Code completion filter range is the filename only, up to completion point.
+ PP->setCodeCompletionIdentifierInfo(&PP->getIdentifierTable().get(
+ StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
+ // We should replace the characters up to the closing quote, if any.
+ while (CompletionPoint < BufferEnd) {
+ char Next = *(CompletionPoint + 1);
+ if (Next == 0 || Next == '\r' || Next == '\n')
+ break;
+ ++CompletionPoint;
+ if (Next == (IsAngled ? '>' : '"'))
+ break;
+ }
+ PP->setCodeCompletionTokenRange(
+ FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
+ FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
+ PP->CodeCompleteIncludedFile(Dir, IsAngled);
+}
+
+/// LexCharConstant - Lex the remainder of a character constant, after having
+/// lexed either ' or L' or u8' or u' or U'.
+bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
+ // Does this character contain the \0 character?
+ const char *NulCharacter = nullptr;
+
+ if (!isLexingRawMode()) {
+ if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
+ Diag(BufferPtr, getLangOpts().CPlusPlus
+ ? diag::warn_cxx98_compat_unicode_literal
+ : diag::warn_c99_compat_unicode_literal);
+ else if (Kind == tok::utf8_char_constant)
+ Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
+ }
+
+ char C = getAndAdvanceChar(CurPtr, Result);
+ if (C == '\'') {
+ if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
+ Diag(BufferPtr, diag::ext_empty_character);
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return true;
+ }
+
+ while (C != '\'') {
+ // Skip escaped characters.
+ if (C == '\\')
+ C = getAndAdvanceChar(CurPtr, Result);
+
+ if (C == '\n' || C == '\r' || // Newline.
+ (C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
+ if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
+ Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ return true;
+ }
+
+ if (C == 0) {
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ cutOffLexing();
+ return true;
+ }
+
+ NulCharacter = CurPtr-1;
+ }
+ C = getAndAdvanceChar(CurPtr, Result);
+ }
+
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getLangOpts().CPlusPlus)
+ CurPtr = LexUDSuffix(Result, CurPtr, false);
+
+ // If a nul character existed in the character, warn about it.
+ if (NulCharacter && !isLexingRawMode())
+ Diag(NulCharacter, diag::null_in_char_or_string) << 0;
+
+ // Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, Kind);
+ Result.setLiteralData(TokStart);
+ return true;
+}
+
+/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
+/// Update BufferPtr to point to the next non-whitespace character and return.
+///
+/// This method forms a token and returns true if KeepWhitespaceMode is enabled.
+bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
+ bool &TokAtPhysicalStartOfLine) {
+ // Whitespace - Skip it, then return the token after the whitespace.
+ bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
+
+ unsigned char Char = *CurPtr;
+
+ // Skip consecutive spaces efficiently.
+ while (true) {
+ // Skip horizontal whitespace very aggressively.
+ while (isHorizontalWhitespace(Char))
+ Char = *++CurPtr;
+
+ // Otherwise if we have something other than whitespace, we're done.
+ if (!isVerticalWhitespace(Char))
+ break;
+
+ if (ParsingPreprocessorDirective) {
+ // End of preprocessor directive line, let LexTokenInternal handle this.
+ BufferPtr = CurPtr;
+ return false;
+ }
+
+ // OK, but handle newline.
+ SawNewline = true;
+ Char = *++CurPtr;
+ }
+
+ // If the client wants us to return whitespace, return it now.
+ if (isKeepWhitespaceMode()) {
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ if (SawNewline) {
+ IsAtStartOfLine = true;
+ IsAtPhysicalStartOfLine = true;
+ }
+ // FIXME: The next token will not have LeadingSpace set.
+ return true;
+ }
+
+ // If this isn't immediately after a newline, there is leading space.
+ char PrevChar = CurPtr[-1];
+ bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
+
+ Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+ if (SawNewline) {
+ Result.setFlag(Token::StartOfLine);
+ TokAtPhysicalStartOfLine = true;
+ }
+
+ BufferPtr = CurPtr;
+ return false;
+}
+
+/// We have just read the // characters from input. Skip until we find the
+/// newline character that terminates the comment. Then update BufferPtr and
+/// return.
+///
+/// If we're in KeepCommentMode or any CommentHandler has inserted
+/// some tokens, this will store the first token and return true.
+bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
+ bool &TokAtPhysicalStartOfLine) {
+ // If Line comments aren't explicitly enabled for this language, emit an
+ // extension warning.
+ if (!LangOpts.LineComment && !isLexingRawMode()) {
+ Diag(BufferPtr, diag::ext_line_comment);
+
+ // Mark them enabled so we only emit one warning for this translation
+ // unit.
+ LangOpts.LineComment = true;
+ }
+
+ // Scan over the body of the comment. The common case, when scanning, is that
+ // the comment contains normal ascii characters with nothing interesting in
+ // them. As such, optimize for this case with the inner loop.
+ //
+ // This loop terminates with CurPtr pointing at the newline (or end of buffer)
+ // character that ends the line comment.
+ char C;
+ while (true) {
+ C = *CurPtr;
+ // Skip over characters in the fast loop.
+ while (C != 0 && // Potentially EOF.
+ C != '\n' && C != '\r') // Newline or DOS-style newline.
+ C = *++CurPtr;
+
+ const char *NextLine = CurPtr;
+ if (C != 0) {
+ // We found a newline, see if it's escaped.
+ const char *EscapePtr = CurPtr-1;
+ bool HasSpace = false;
+ while (isHorizontalWhitespace(*EscapePtr)) { // Skip whitespace.
+ --EscapePtr;
+ HasSpace = true;
+ }
+
+ if (*EscapePtr == '\\')
+ // Escaped newline.
+ CurPtr = EscapePtr;
+ else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' &&
+ EscapePtr[-2] == '?' && LangOpts.Trigraphs)
+ // Trigraph-escaped newline.
+ CurPtr = EscapePtr-2;
+ else
+ break; // This is a newline, we're done.
+
+ // If there was space between the backslash and newline, warn about it.
+ if (HasSpace && !isLexingRawMode())
+ Diag(EscapePtr, diag::backslash_newline_space);
+ }
+
+ // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to
+ // properly decode the character. Read it in raw mode to avoid emitting
+ // diagnostics about things like trigraphs. If we see an escaped newline,
+ // we'll handle it below.
+ const char *OldPtr = CurPtr;
+ bool OldRawMode = isLexingRawMode();
+ LexingRawMode = true;
+ C = getAndAdvanceChar(CurPtr, Result);
+ LexingRawMode = OldRawMode;
+
+ // If we only read only one character, then no special handling is needed.
+ // We're done and can skip forward to the newline.
+ if (C != 0 && CurPtr == OldPtr+1) {
+ CurPtr = NextLine;
+ break;
+ }
+
+ // If we read multiple characters, and one of those characters was a \r or
+ // \n, then we had an escaped newline within the comment. Emit diagnostic
+ // unless the next line is also a // comment.
+ if (CurPtr != OldPtr + 1 && C != '/' &&
+ (CurPtr == BufferEnd + 1 || CurPtr[0] != '/')) {
+ for (; OldPtr != CurPtr; ++OldPtr)
+ if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
+ // Okay, we found a // comment that ends in a newline, if the next
+ // line is also a // comment, but has spaces, don't emit a diagnostic.
+ if (isWhitespace(C)) {
+ const char *ForwardPtr = CurPtr;
+ while (isWhitespace(*ForwardPtr)) // Skip whitespace.
+ ++ForwardPtr;
+ if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
+ break;
+ }
+
+ if (!isLexingRawMode())
+ Diag(OldPtr-1, diag::ext_multi_line_line_comment);
+ break;
+ }
+ }
+
+ if (C == '\r' || C == '\n' || CurPtr == BufferEnd + 1) {
+ --CurPtr;
+ break;
+ }
+
+ if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ cutOffLexing();
+ return false;
+ }
+ }
+
+ // Found but did not consume the newline. Notify comment handlers about the
+ // comment unless we're in a #if 0 block.
+ if (PP && !isLexingRawMode() &&
+ PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr),
+ getSourceLocation(CurPtr)))) {
+ BufferPtr = CurPtr;
+ return true; // A token has to be returned.
+ }
+
+ // If we are returning comments as tokens, return this comment as a token.
+ if (inKeepCommentMode())
+ return SaveLineComment(Result, CurPtr);
+
+ // If we are inside a preprocessor directive and we see the end of line,
+ // return immediately, so that the lexer can return this as an EOD token.
+ if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
+ BufferPtr = CurPtr;
+ return false;
+ }
+
+ // Otherwise, eat the \n character. We don't care if this is a \n\r or
+ // \r\n sequence. This is an efficiency hack (because we know the \n can't
+ // contribute to another token), it isn't needed for correctness. Note that
+ // this is ok even in KeepWhitespaceMode, because we would have returned the
+ /// comment above in that mode.
+ ++CurPtr;
+
+ // The next returned token is at the start of the line.
+ Result.setFlag(Token::StartOfLine);
+ TokAtPhysicalStartOfLine = true;
+ // No leading whitespace seen so far.
+ Result.clearFlag(Token::LeadingSpace);
+ BufferPtr = CurPtr;
+ return false;
+}
+
+/// If in save-comment mode, package up this Line comment in an appropriate
+/// way and return it.
+bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) {
+ // If we're not in a preprocessor directive, just return the // comment
+ // directly.
+ FormTokenWithChars(Result, CurPtr, tok::comment);
+
+ if (!ParsingPreprocessorDirective || LexingRawMode)
+ return true;
+
+ // If this Line-style comment is in a macro definition, transmogrify it into
+ // a C-style block comment.
+ bool Invalid = false;
+ std::string Spelling = PP->getSpelling(Result, &Invalid);
+ if (Invalid)
+ return true;
+
+ assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?");
+ Spelling[1] = '*'; // Change prefix to "/*".
+ Spelling += "*/"; // add suffix.
+
+ Result.setKind(tok::comment);
+ PP->CreateString(Spelling, Result,
+ Result.getLocation(), Result.getLocation());
+ return true;
+}
+
+/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
+/// character (either \\n or \\r) is part of an escaped newline sequence. Issue
+/// a diagnostic if so. We know that the newline is inside of a block comment.
+static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
+ Lexer *L) {
+ assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
+
+ // Back up off the newline.
+ --CurPtr;
+
+ // If this is a two-character newline sequence, skip the other character.
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+ // \n\n or \r\r -> not escaped newline.
+ if (CurPtr[0] == CurPtr[1])
+ return false;
+ // \n\r or \r\n -> skip the newline.
+ --CurPtr;
+ }
+
+ // If we have horizontal whitespace, skip over it. We allow whitespace
+ // between the slash and newline.
+ bool HasSpace = false;
+ while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+ --CurPtr;
+ HasSpace = true;
+ }
+
+ // If we have a slash, we know this is an escaped newline.
+ if (*CurPtr == '\\') {
+ if (CurPtr[-1] != '*') return false;
+ } else {
+ // It isn't a slash, is it the ?? / trigraph?
+ if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
+ CurPtr[-3] != '*')
+ return false;
+
+ // This is the trigraph ending the comment. Emit a stern warning!
+ CurPtr -= 2;
+
+ // If no trigraphs are enabled, warn that we ignored this trigraph and
+ // ignore this * character.
+ if (!L->getLangOpts().Trigraphs) {
+ if (!L->isLexingRawMode())
+ L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+ return false;
+ }
+ if (!L->isLexingRawMode())
+ L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+ }
+
+ // Warn about having an escaped newline between the */ characters.
+ if (!L->isLexingRawMode())
+ L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
+
+ // If there was space between the backslash and newline, warn about it.
+ if (HasSpace && !L->isLexingRawMode())
+ L->Diag(CurPtr, diag::backslash_newline_space);
+
+ return true;
+}
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#elif __ALTIVEC__
+#include <altivec.h>
+#undef bool
+#endif
+
+/// We have just read from input the / and * characters that started a comment.
+/// Read until we find the * and / characters that terminate the comment.
+/// Note that we don't bother decoding trigraphs or escaped newlines in block
+/// comments, because they cannot cause the comment to end. The only thing
+/// that can happen is the comment could end with an escaped newline between
+/// the terminating * and /.
+///
+/// If we're in KeepCommentMode or any CommentHandler has inserted
+/// some tokens, this will store the first token and return true.
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
+ bool &TokAtPhysicalStartOfLine) {
+ // Scan one character past where we should, looking for a '/' character. Once
+ // we find it, check to see if it was preceded by a *. This common
+ // optimization helps people who like to put a lot of * characters in their
+ // comments.
+
+ // The first character we get with newlines and trigraphs skipped to handle
+ // the degenerate /*/ case below correctly if the * has an escaped newline
+ // after it.
+ unsigned CharSize;
+ unsigned char C = getCharAndSize(CurPtr, CharSize);
+ CurPtr += CharSize;
+ if (C == 0 && CurPtr == BufferEnd+1) {
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::err_unterminated_block_comment);
+ --CurPtr;
+
+ // KeepWhitespaceMode should return this broken comment as a token. Since
+ // it isn't a well formed comment, just return it as an 'unknown' token.
+ if (isKeepWhitespaceMode()) {
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return true;
+ }
+
+ BufferPtr = CurPtr;
+ return false;
+ }
+
+ // Check to see if the first character after the '/*' is another /. If so,
+ // then this slash does not end the block comment, it is part of it.
+ if (C == '/')
+ C = *CurPtr++;
+
+ while (true) {
+ // Skip over all non-interesting characters until we find end of buffer or a
+ // (probably ending) '/' character.
+ if (CurPtr + 24 < BufferEnd &&
+ // If there is a code-completion point avoid the fast scan because it
+ // doesn't check for '\0'.
+ !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) {
+ // While not aligned to a 16-byte boundary.
+ while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
+ C = *CurPtr++;
+
+ if (C == '/') goto FoundSlash;
+
+#ifdef __SSE2__
+ __m128i Slashes = _mm_set1_epi8('/');
+ while (CurPtr+16 <= BufferEnd) {
+ int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr,
+ Slashes));
+ if (cmp != 0) {
+ // Adjust the pointer to point directly after the first slash. It's
+ // not necessary to set C here, it will be overwritten at the end of
+ // the outer loop.
+ CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
+ goto FoundSlash;
+ }
+ CurPtr += 16;
+ }
+#elif __ALTIVEC__
+ __vector unsigned char Slashes = {
+ '/', '/', '/', '/', '/', '/', '/', '/',
+ '/', '/', '/', '/', '/', '/', '/', '/'
+ };
+ while (CurPtr+16 <= BufferEnd &&
+ !vec_any_eq(*(const vector unsigned char*)CurPtr, Slashes))
+ CurPtr += 16;
+#else
+ // Scan for '/' quickly. Many block comments are very large.
+ while (CurPtr[0] != '/' &&
+ CurPtr[1] != '/' &&
+ CurPtr[2] != '/' &&
+ CurPtr[3] != '/' &&
+ CurPtr+4 < BufferEnd) {
+ CurPtr += 4;
+ }
+#endif
+
+ // It has to be one of the bytes scanned, increment to it and read one.
+ C = *CurPtr++;
+ }
+
+ // Loop to scan the remainder.
+ while (C != '/' && C != '\0')
+ C = *CurPtr++;
+
+ if (C == '/') {
+ FoundSlash:
+ if (CurPtr[-2] == '*') // We found the final */. We're done!
+ break;
+
+ if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
+ if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
+ // We found the final */, though it had an escaped newline between the
+ // * and /. We're done!
+ break;
+ }
+ }
+ if (CurPtr[0] == '*' && CurPtr[1] != '/') {
+ // If this is a /* inside of the comment, emit a warning. Don't do this
+ // if this is a /*/, which will end the comment. This misses cases with
+ // embedded escaped newlines, but oh well.
+ if (!isLexingRawMode())
+ Diag(CurPtr-1, diag::warn_nested_block_comment);
+ }
+ } else if (C == 0 && CurPtr == BufferEnd+1) {
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::err_unterminated_block_comment);
+ // Note: the user probably forgot a */. We could continue immediately
+ // after the /*, but this would involve lexing a lot of what really is the
+ // comment, which surely would confuse the parser.
+ --CurPtr;
+
+ // KeepWhitespaceMode should return this broken comment as a token. Since
+ // it isn't a well formed comment, just return it as an 'unknown' token.
+ if (isKeepWhitespaceMode()) {
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return true;
+ }
+
+ BufferPtr = CurPtr;
+ return false;
+ } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ cutOffLexing();
+ return false;
+ }
+
+ C = *CurPtr++;
+ }
+
+ // Notify comment handlers about the comment unless we're in a #if 0 block.
+ if (PP && !isLexingRawMode() &&
+ PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr),
+ getSourceLocation(CurPtr)))) {
+ BufferPtr = CurPtr;
+ return true; // A token has to be returned.
+ }
+
+ // If we are returning comments as tokens, return this comment as a token.
+ if (inKeepCommentMode()) {
+ FormTokenWithChars(Result, CurPtr, tok::comment);
+ return true;
+ }
+
+ // It is common for the tokens immediately after a /**/ comment to be
+ // whitespace. Instead of going through the big switch, handle it
+ // efficiently now. This is safe even in KeepWhitespaceMode because we would
+ // have already returned above with the comment as a token.
+ if (isHorizontalWhitespace(*CurPtr)) {
+ SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
+ return false;
+ }
+
+ // Otherwise, just return so that the next character will be lexed as a token.
+ BufferPtr = CurPtr;
+ Result.setFlag(Token::LeadingSpace);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Primary Lexing Entry Points
+//===----------------------------------------------------------------------===//
+
+/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
+/// uninterpreted string. This switches the lexer out of directive mode.
+void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) {
+ assert(ParsingPreprocessorDirective && ParsingFilename == false &&
+ "Must be in a preprocessing directive!");
+ Token Tmp;
+
+ // CurPtr - Cache BufferPtr in an automatic variable.
+ const char *CurPtr = BufferPtr;
+ while (true) {
+ char Char = getAndAdvanceChar(CurPtr, Tmp);
+ switch (Char) {
+ default:
+ if (Result)
+ Result->push_back(Char);
+ break;
+ case 0: // Null.
+ // Found end of file?
+ if (CurPtr-1 != BufferEnd) {
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ PP->CodeCompleteNaturalLanguage();
+ cutOffLexing();
+ return;
+ }
+
+ // Nope, normal character, continue.
+ if (Result)
+ Result->push_back(Char);
+ break;
+ }
+ // FALL THROUGH.
+ LLVM_FALLTHROUGH;
+ case '\r':
+ case '\n':
+ // Okay, we found the end of the line. First, back up past the \0, \r, \n.
+ assert(CurPtr[-1] == Char && "Trigraphs for newline?");
+ BufferPtr = CurPtr-1;
+
+ // Next, lex the character, which should handle the EOD transition.
+ Lex(Tmp);
+ if (Tmp.is(tok::code_completion)) {
+ if (PP)
+ PP->CodeCompleteNaturalLanguage();
+ Lex(Tmp);
+ }
+ assert(Tmp.is(tok::eod) && "Unexpected token!");
+
+ // Finally, we're done;
+ return;
+ }
+ }
+}
+
+/// LexEndOfFile - CurPtr points to the end of this file. Handle this
+/// condition, reporting diagnostics and handling other edge cases as required.
+/// This returns true if Result contains a token, false if PP.Lex should be
+/// called again.
+bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
+ // If we hit the end of the file while parsing a preprocessor directive,
+ // end the preprocessor directive first. The next token returned will
+ // then be the end of file.
+ if (ParsingPreprocessorDirective) {
+ // Done parsing the "line".
+ ParsingPreprocessorDirective = false;
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr, tok::eod);
+
+ // Restore comment saving mode, in case it was disabled for directive.
+ if (PP)
+ resetExtendedTokenMode();
+ return true; // Have a token.
+ }
+
+ // If we are in raw mode, return this event as an EOF token. Let the caller
+ // that put us in raw mode handle the event.
+ if (isLexingRawMode()) {
+ Result.startToken();
+ BufferPtr = BufferEnd;
+ FormTokenWithChars(Result, BufferEnd, tok::eof);
+ return true;
+ }
+
+ if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) {
+ PP->setRecordedPreambleConditionalStack(ConditionalStack);
+ ConditionalStack.clear();
+ }
+
+ // Issue diagnostics for unterminated #if and missing newline.
+
+ // If we are in a #if directive, emit an error.
+ while (!ConditionalStack.empty()) {
+ if (PP->getCodeCompletionFileLoc() != FileLoc)
+ PP->Diag(ConditionalStack.back().IfLoc,
+ diag::err_pp_unterminated_conditional);
+ ConditionalStack.pop_back();
+ }
+
+ // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
+ // a pedwarn.
+ if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) {
+ DiagnosticsEngine &Diags = PP->getDiagnostics();
+ SourceLocation EndLoc = getSourceLocation(BufferEnd);
+ unsigned DiagID;
+
+ if (LangOpts.CPlusPlus11) {
+ // C++11 [lex.phases] 2.2 p2
+ // Prefer the C++98 pedantic compatibility warning over the generic,
+ // non-extension, user-requested "missing newline at EOF" warning.
+ if (!Diags.isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
+ DiagID = diag::warn_cxx98_compat_no_newline_eof;
+ } else {
+ DiagID = diag::warn_no_newline_eof;
+ }
+ } else {
+ DiagID = diag::ext_no_newline_eof;
+ }
+
+ Diag(BufferEnd, DiagID)
+ << FixItHint::CreateInsertion(EndLoc, "\n");
+ }
+
+ BufferPtr = CurPtr;
+
+ // Finally, let the preprocessor handle this.
+ return PP->HandleEndOfFile(Result, isPragmaLexer());
+}
+
+/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
+/// the specified lexer will return a tok::l_paren token, 0 if it is something
+/// else and 2 if there are no more tokens in the buffer controlled by the
+/// lexer.
+unsigned Lexer::isNextPPTokenLParen() {
+ assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
+
+ // Switch to 'skipping' mode. This will ensure that we can lex a token
+ // without emitting diagnostics, disables macro expansion, and will cause EOF
+ // to return an EOF token instead of popping the include stack.
+ LexingRawMode = true;
+
+ // Save state that can be changed while lexing so that we can restore it.
+ const char *TmpBufferPtr = BufferPtr;
+ bool inPPDirectiveMode = ParsingPreprocessorDirective;
+ bool atStartOfLine = IsAtStartOfLine;
+ bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
+ bool leadingSpace = HasLeadingSpace;
+
+ Token Tok;
+ Lex(Tok);
+
+ // Restore state that may have changed.
+ BufferPtr = TmpBufferPtr;
+ ParsingPreprocessorDirective = inPPDirectiveMode;
+ HasLeadingSpace = leadingSpace;
+ IsAtStartOfLine = atStartOfLine;
+ IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
+
+ // Restore the lexer back to non-skipping mode.
+ LexingRawMode = false;
+
+ if (Tok.is(tok::eof))
+ return 2;
+ return Tok.is(tok::l_paren);
+}
+
+/// Find the end of a version control conflict marker.
+static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
+ ConflictMarkerKind CMK) {
+ const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>";
+ size_t TermLen = CMK == CMK_Perforce ? 5 : 7;
+ auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
+ size_t Pos = RestOfBuffer.find(Terminator);
+ while (Pos != StringRef::npos) {
+ // Must occur at start of line.
+ if (Pos == 0 ||
+ (RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) {
+ RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
+ Pos = RestOfBuffer.find(Terminator);
+ continue;
+ }
+ return RestOfBuffer.data()+Pos;
+ }
+ return nullptr;
+}
+
+/// IsStartOfConflictMarker - If the specified pointer is the start of a version
+/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
+/// and recover nicely. This returns true if it is a conflict marker and false
+/// if not.
+bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
+ // Only a conflict marker if it starts at the beginning of a line.
+ if (CurPtr != BufferStart &&
+ CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+ return false;
+
+ // Check to see if we have <<<<<<< or >>>>.
+ if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") &&
+ !StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> "))
+ return false;
+
+ // If we have a situation where we don't care about conflict markers, ignore
+ // it.
+ if (CurrentConflictMarkerState || isLexingRawMode())
+ return false;
+
+ ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce;
+
+ // Check to see if there is an ending marker somewhere in the buffer at the
+ // start of a line to terminate this conflict marker.
+ if (FindConflictEnd(CurPtr, BufferEnd, Kind)) {
+ // We found a match. We are really in a conflict marker.
+ // Diagnose this, and ignore to the end of line.
+ Diag(CurPtr, diag::err_conflict_marker);
+ CurrentConflictMarkerState = Kind;
+
+ // Skip ahead to the end of line. We know this exists because the
+ // end-of-conflict marker starts with \r or \n.
+ while (*CurPtr != '\r' && *CurPtr != '\n') {
+ assert(CurPtr != BufferEnd && "Didn't find end of line");
+ ++CurPtr;
+ }
+ BufferPtr = CurPtr;
+ return true;
+ }
+
+ // No end of conflict marker found.
+ return false;
+}
+
+/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if
+/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it
+/// is the end of a conflict marker. Handle it by ignoring up until the end of
+/// the line. This returns true if it is a conflict marker and false if not.
+bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
+ // Only a conflict marker if it starts at the beginning of a line.
+ if (CurPtr != BufferStart &&
+ CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+ return false;
+
+ // If we have a situation where we don't care about conflict markers, ignore
+ // it.
+ if (!CurrentConflictMarkerState || isLexingRawMode())
+ return false;
+
+ // Check to see if we have the marker (4 characters in a row).
+ for (unsigned i = 1; i != 4; ++i)
+ if (CurPtr[i] != CurPtr[0])
+ return false;
+
+ // If we do have it, search for the end of the conflict marker. This could
+ // fail if it got skipped with a '#if 0' or something. Note that CurPtr might
+ // be the end of conflict marker.
+ if (const char *End = FindConflictEnd(CurPtr, BufferEnd,
+ CurrentConflictMarkerState)) {
+ CurPtr = End;
+
+ // Skip ahead to the end of line.
+ while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
+ ++CurPtr;
+
+ BufferPtr = CurPtr;
+
+ // No longer in the conflict marker.
+ CurrentConflictMarkerState = CMK_None;
+ return true;
+ }
+
+ return false;
+}
+
+static const char *findPlaceholderEnd(const char *CurPtr,
+ const char *BufferEnd) {
+ if (CurPtr == BufferEnd)
+ return nullptr;
+ BufferEnd -= 1; // Scan until the second last character.
+ for (; CurPtr != BufferEnd; ++CurPtr) {
+ if (CurPtr[0] == '#' && CurPtr[1] == '>')
+ return CurPtr + 2;
+ }
+ return nullptr;
+}
+
+bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) {
+ assert(CurPtr[-1] == '<' && CurPtr[0] == '#' && "Not a placeholder!");
+ if (!PP || !PP->getPreprocessorOpts().LexEditorPlaceholders || LexingRawMode)
+ return false;
+ const char *End = findPlaceholderEnd(CurPtr + 1, BufferEnd);
+ if (!End)
+ return false;
+ const char *Start = CurPtr - 1;
+ if (!LangOpts.AllowEditorPlaceholders)
+ Diag(Start, diag::err_placeholder_in_source);
+ Result.startToken();
+ FormTokenWithChars(Result, End, tok::raw_identifier);
+ Result.setRawIdentifierData(Start);
+ PP->LookUpIdentifierInfo(Result);
+ Result.setFlag(Token::IsEditorPlaceholder);
+ BufferPtr = End;
+ return true;
+}
+
+bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
+ if (PP && PP->isCodeCompletionEnabled()) {
+ SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
+ return Loc == PP->getCodeCompletionLoc();
+ }
+
+ return false;
+}
+
+uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
+ Token *Result) {
+ unsigned CharSize;
+ char Kind = getCharAndSize(StartPtr, CharSize);
+
+ unsigned NumHexDigits;
+ if (Kind == 'u')
+ NumHexDigits = 4;
+ else if (Kind == 'U')
+ NumHexDigits = 8;
+ else
+ return 0;
+
+ if (!LangOpts.CPlusPlus && !LangOpts.C99) {
+ if (Result && !isLexingRawMode())
+ Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
+ return 0;
+ }
+
+ const char *CurPtr = StartPtr + CharSize;
+ const char *KindLoc = &CurPtr[-1];
+
+ uint32_t CodePoint = 0;
+ for (unsigned i = 0; i < NumHexDigits; ++i) {
+ char C = getCharAndSize(CurPtr, CharSize);
+
+ unsigned Value = llvm::hexDigitValue(C);
+ if (Value == -1U) {
+ if (Result && !isLexingRawMode()) {
+ if (i == 0) {
+ Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
+ << StringRef(KindLoc, 1);
+ } else {
+ Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
+
+ // If the user wrote \U1234, suggest a fixit to \u.
+ if (i == 4 && NumHexDigits == 8) {
+ CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
+ Diag(KindLoc, diag::note_ucn_four_not_eight)
+ << FixItHint::CreateReplacement(URange, "u");
+ }
+ }
+ }
+
+ return 0;
+ }
+
+ CodePoint <<= 4;
+ CodePoint += Value;
+
+ CurPtr += CharSize;
+ }
+
+ if (Result) {
+ Result->setFlag(Token::HasUCN);
+ if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2)
+ StartPtr = CurPtr;
+ else
+ while (StartPtr != CurPtr)
+ (void)getAndAdvanceChar(StartPtr, *Result);
+ } else {
+ StartPtr = CurPtr;
+ }
+
+ // Don't apply C family restrictions to UCNs in assembly mode
+ if (LangOpts.AsmPreprocessor)
+ return CodePoint;
+
+ // C99 6.4.3p2: A universal character name shall not specify a character whose
+ // short identifier is less than 00A0 other than 0024 ($), 0040 (@), or
+ // 0060 (`), nor one in the range D800 through DFFF inclusive.)
+ // C++11 [lex.charset]p2: If the hexadecimal value for a
+ // universal-character-name corresponds to a surrogate code point (in the
+ // range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally,
+ // if the hexadecimal value for a universal-character-name outside the
+ // c-char-sequence, s-char-sequence, or r-char-sequence of a character or
+ // string literal corresponds to a control character (in either of the
+ // ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the
+ // basic source character set, the program is ill-formed.
+ if (CodePoint < 0xA0) {
+ if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
+ return CodePoint;
+
+ // We don't use isLexingRawMode() here because we need to warn about bad
+ // UCNs even when skipping preprocessing tokens in a #if block.
+ if (Result && PP) {
+ if (CodePoint < 0x20 || CodePoint >= 0x7F)
+ Diag(BufferPtr, diag::err_ucn_control_character);
+ else {
+ char C = static_cast<char>(CodePoint);
+ Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
+ }
+ }
+
+ return 0;
+ } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
+ // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't.
+ // We don't use isLexingRawMode() here because we need to diagnose bad
+ // UCNs even when skipping preprocessing tokens in a #if block.
+ if (Result && PP) {
+ if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
+ Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
+ else
+ Diag(BufferPtr, diag::err_ucn_escape_invalid);
+ }
+ return 0;
+ }
+
+ return CodePoint;
+}
+
+bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C,
+ const char *CurPtr) {
+ static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
+ UnicodeWhitespaceCharRanges);
+ if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+ UnicodeWhitespaceChars.contains(C)) {
+ Diag(BufferPtr, diag::ext_unicode_whitespace)
+ << makeCharRange(*this, BufferPtr, CurPtr);
+
+ Result.setFlag(Token::LeadingSpace);
+ return true;
+ }
+ return false;
+}
+
+bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+ if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) {
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput()) {
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C,
+ makeCharRange(*this, BufferPtr, CurPtr),
+ /*IsFirst=*/true);
+ maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C,
+ makeCharRange(*this, BufferPtr, CurPtr));
+ }
+
+ MIOpt.ReadToken();
+ return LexIdentifier(Result, CurPtr);
+ }
+
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput() &&
+ !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) {
+ // Non-ASCII characters tend to creep into source code unintentionally.
+ // Instead of letting the parser complain about the unknown token,
+ // just drop the character.
+ // Note that we can /only/ do this when the non-ASCII character is actually
+ // spelled as Unicode, not written as a UCN. The standard requires that
+ // we not throw away any possible preprocessor tokens, but there's a
+ // loophole in the mapping of Unicode characters to basic character set
+ // characters that allows us to map these particular characters to, say,
+ // whitespace.
+ Diag(BufferPtr, diag::err_non_ascii)
+ << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr));
+
+ BufferPtr = CurPtr;
+ return false;
+ }
+
+ // Otherwise, we have an explicit UCN or a character that's unlikely to show
+ // up by accident.
+ MIOpt.ReadToken();
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return true;
+}
+
+void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+ IsAtStartOfLine = Result.isAtStartOfLine();
+ HasLeadingSpace = Result.hasLeadingSpace();
+ HasLeadingEmptyMacro = Result.hasLeadingEmptyMacro();
+ // Note that this doesn't affect IsAtPhysicalStartOfLine.
+}
+
+bool Lexer::Lex(Token &Result) {
+ // Start a new token.
+ Result.startToken();
+
+ // Set up misc whitespace flags for LexTokenInternal.
+ if (IsAtStartOfLine) {
+ Result.setFlag(Token::StartOfLine);
+ IsAtStartOfLine = false;
+ }
+
+ if (HasLeadingSpace) {
+ Result.setFlag(Token::LeadingSpace);
+ HasLeadingSpace = false;
+ }
+
+ if (HasLeadingEmptyMacro) {
+ Result.setFlag(Token::LeadingEmptyMacro);
+ HasLeadingEmptyMacro = false;
+ }
+
+ bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
+ IsAtPhysicalStartOfLine = false;
+ bool isRawLex = isLexingRawMode();
+ (void) isRawLex;
+ bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
+ // (After the LexTokenInternal call, the lexer might be destroyed.)
+ assert((returnedToken || !isRawLex) && "Raw lex must succeed");
+ return returnedToken;
+}
+
+/// LexTokenInternal - This implements a simple C family lexer. It is an
+/// extremely performance critical piece of code. This assumes that the buffer
+/// has a null character at the end of the file. This returns a preprocessing
+/// token, not a normal token, as such, it is an internal interface. It assumes
+/// that the Flags of result have been cleared before calling this.
+bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
+LexNextToken:
+ // New token, can't need cleaning yet.
+ Result.clearFlag(Token::NeedsCleaning);
+ Result.setIdentifierInfo(nullptr);
+
+ // CurPtr - Cache BufferPtr in an automatic variable.
+ const char *CurPtr = BufferPtr;
+
+ // Small amounts of horizontal whitespace is very common between tokens.
+ if ((*CurPtr == ' ') || (*CurPtr == '\t')) {
+ ++CurPtr;
+ while ((*CurPtr == ' ') || (*CurPtr == '\t'))
+ ++CurPtr;
+
+ // If we are keeping whitespace and other tokens, just return what we just
+ // skipped. The next lexer invocation will return the token after the
+ // whitespace.
+ if (isKeepWhitespaceMode()) {
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ // FIXME: The next token will not have LeadingSpace set.
+ return true;
+ }
+
+ BufferPtr = CurPtr;
+ Result.setFlag(Token::LeadingSpace);
+ }
+
+ unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
+
+ // Read a character, advancing over it.
+ char Char = getAndAdvanceChar(CurPtr, Result);
+ tok::TokenKind Kind;
+
+ switch (Char) {
+ case 0: // Null.
+ // Found end of file?
+ if (CurPtr-1 == BufferEnd)
+ return LexEndOfFile(Result, CurPtr-1);
+
+ // Check if we are performing code completion.
+ if (isCodeCompletionPoint(CurPtr-1)) {
+ // Return the code-completion token.
+ Result.startToken();
+ FormTokenWithChars(Result, CurPtr, tok::code_completion);
+ return true;
+ }
+
+ if (!isLexingRawMode())
+ Diag(CurPtr-1, diag::null_in_file);
+ Result.setFlag(Token::LeadingSpace);
+ if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+ return true; // KeepWhitespaceMode
+
+ // We know the lexer hasn't changed, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+
+ case 26: // DOS & CP/M EOF: "^Z".
+ // If we're in Microsoft extensions mode, treat this as end of file.
+ if (LangOpts.MicrosoftExt) {
+ if (!isLexingRawMode())
+ Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
+ return LexEndOfFile(Result, CurPtr-1);
+ }
+
+ // If Microsoft extensions are disabled, this is just random garbage.
+ Kind = tok::unknown;
+ break;
+
+ case '\r':
+ if (CurPtr[0] == '\n')
+ (void)getAndAdvanceChar(CurPtr, Result);
+ LLVM_FALLTHROUGH;
+ case '\n':
+ // If we are inside a preprocessor directive and we see the end of line,
+ // we know we are done with the directive, so return an EOD token.
+ if (ParsingPreprocessorDirective) {
+ // Done parsing the "line".
+ ParsingPreprocessorDirective = false;
+
+ // Restore comment saving mode, in case it was disabled for directive.
+ if (PP)
+ resetExtendedTokenMode();
+
+ // Since we consumed a newline, we are back at the start of a line.
+ IsAtStartOfLine = true;
+ IsAtPhysicalStartOfLine = true;
+
+ Kind = tok::eod;
+ break;
+ }
+
+ // No leading whitespace seen so far.
+ Result.clearFlag(Token::LeadingSpace);
+
+ if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+ return true; // KeepWhitespaceMode
+
+ // We only saw whitespace, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\v':
+ SkipHorizontalWhitespace:
+ Result.setFlag(Token::LeadingSpace);
+ if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+ return true; // KeepWhitespaceMode
+
+ SkipIgnoredUnits:
+ CurPtr = BufferPtr;
+
+ // If the next token is obviously a // or /* */ comment, skip it efficiently
+ // too (without going through the big switch stmt).
+ if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
+ LangOpts.LineComment &&
+ (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
+ if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+ return true; // There is a token to return.
+ goto SkipIgnoredUnits;
+ } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
+ if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+ return true; // There is a token to return.
+ goto SkipIgnoredUnits;
+ } else if (isHorizontalWhitespace(*CurPtr)) {
+ goto SkipHorizontalWhitespace;
+ }
+ // We only saw whitespace, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+
+ // C99 6.4.4.1: Integer Constants.
+ // C99 6.4.4.2: Floating Constants.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexNumericConstant(Result, CurPtr);
+
+ case 'u': // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // UTF-16 string literal
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf16_string_literal);
+
+ // UTF-16 character constant
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf16_char_constant);
+
+ // UTF-16 raw string literal
+ if (Char == 'R' && LangOpts.CPlusPlus11 &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf16_string_literal);
+
+ if (Char == '8') {
+ char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
+
+ // UTF-8 string literal
+ if (Char2 == '"')
+ return LexStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf8_string_literal);
+ if (Char2 == '\'' && LangOpts.CPlusPlus17)
+ return LexCharConstant(
+ Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf8_char_constant);
+
+ if (Char2 == 'R' && LangOpts.CPlusPlus11) {
+ unsigned SizeTmp3;
+ char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
+ // UTF-8 raw string literal
+ if (Char3 == '"') {
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ SizeTmp3, Result),
+ tok::utf8_string_literal);
+ }
+ }
+ }
+ }
+
+ // treat u like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
+ case 'U': // Identifier (Uber) or C11/C++11 UTF-32 string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // UTF-32 string literal
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf32_string_literal);
+
+ // UTF-32 character constant
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::utf32_char_constant);
+
+ // UTF-32 raw string literal
+ if (Char == 'R' && LangOpts.CPlusPlus11 &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf32_string_literal);
+ }
+
+ // treat U like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
+ case 'R': // Identifier or C++0x raw string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (LangOpts.CPlusPlus11) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ if (Char == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::string_literal);
+ }
+
+ // treat R like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
+ case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ // Wide string literal.
+ if (Char == '"')
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::wide_string_literal);
+
+ // Wide raw string literal.
+ if (LangOpts.CPlusPlus11 && Char == 'R' &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::wide_string_literal);
+
+ // Wide character constant.
+ if (Char == '\'')
+ return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::wide_char_constant);
+ // FALL THROUGH, treating L like the start of an identifier.
+ LLVM_FALLTHROUGH;
+
+ // C99 6.4.2: Identifiers.
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+ case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
+ case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/
+ case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+ case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/
+ case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '_':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexIdentifier(Result, CurPtr);
+
+ case '$': // $ in identifiers.
+ if (LangOpts.DollarIdents) {
+ if (!isLexingRawMode())
+ Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexIdentifier(Result, CurPtr);
+ }
+
+ Kind = tok::unknown;
+ break;
+
+ // C99 6.4.4: Character Constants.
+ case '\'':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexCharConstant(Result, CurPtr, tok::char_constant);
+
+ // C99 6.4.5: String Literals.
+ case '"':
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+ return LexStringLiteral(Result, CurPtr,
+ ParsingFilename ? tok::header_name
+ : tok::string_literal);
+
+ // C99 6.4.6: Punctuators.
+ case '?':
+ Kind = tok::question;
+ break;
+ case '[':
+ Kind = tok::l_square;
+ break;
+ case ']':
+ Kind = tok::r_square;
+ break;
+ case '(':
+ Kind = tok::l_paren;
+ break;
+ case ')':
+ Kind = tok::r_paren;
+ break;
+ case '{':
+ Kind = tok::l_brace;
+ break;
+ case '}':
+ Kind = tok::r_brace;
+ break;
+ case '.':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char >= '0' && Char <= '9') {
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+ } else if (LangOpts.CPlusPlus && Char == '*') {
+ Kind = tok::periodstar;
+ CurPtr += SizeTmp;
+ } else if (Char == '.' &&
+ getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
+ Kind = tok::ellipsis;
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else {
+ Kind = tok::period;
+ }
+ break;
+ case '&':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '&') {
+ Kind = tok::ampamp;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '=') {
+ Kind = tok::ampequal;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Kind = tok::amp;
+ }
+ break;
+ case '*':
+ if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+ Kind = tok::starequal;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Kind = tok::star;
+ }
+ break;
+ case '+':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '+') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::plusplus;
+ } else if (Char == '=') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::plusequal;
+ } else {
+ Kind = tok::plus;
+ }
+ break;
+ case '-':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '-') { // --
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::minusminus;
+ } else if (Char == '>' && LangOpts.CPlusPlus &&
+ getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->*
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ Kind = tok::arrowstar;
+ } else if (Char == '>') { // ->
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::arrow;
+ } else if (Char == '=') { // -=
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::minusequal;
+ } else {
+ Kind = tok::minus;
+ }
+ break;
+ case '~':
+ Kind = tok::tilde;
+ break;
+ case '!':
+ if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+ Kind = tok::exclaimequal;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Kind = tok::exclaim;
+ }
+ break;
+ case '/':
+ // 6.4.9: Comments
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '/') { // Line comment.
+ // Even if Line comments are disabled (e.g. in C89 mode), we generally
+ // want to lex this as a comment. There is one problem with this though,
+ // that in one particular corner case, this can change the behavior of the
+ // resultant program. For example, In "foo //**/ bar", C89 would lex
+ // this as "foo / bar" and languages with Line comments would lex it as
+ // "foo". Check to see if the character after the second slash is a '*'.
+ // If so, we will lex that as a "/" instead of the start of a comment.
+ // However, we never do this if we are just preprocessing.
+ bool TreatAsComment = LangOpts.LineComment &&
+ (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
+ if (!TreatAsComment)
+ if (!(PP && PP->isPreprocessedOutput()))
+ TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
+
+ if (TreatAsComment) {
+ if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ TokAtPhysicalStartOfLine))
+ return true; // There is a token to return.
+
+ // It is common for the tokens immediately after a // comment to be
+ // whitespace (indentation for the next line). Instead of going through
+ // the big switch, handle it efficiently now.
+ goto SkipIgnoredUnits;
+ }
+ }
+
+ if (Char == '*') { // /**/ comment.
+ if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ TokAtPhysicalStartOfLine))
+ return true; // There is a token to return.
+
+ // We only saw whitespace, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+ }
+
+ if (Char == '=') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::slashequal;
+ } else {
+ Kind = tok::slash;
+ }
+ break;
+ case '%':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ Kind = tok::percentequal;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (LangOpts.Digraphs && Char == '>') {
+ Kind = tok::r_brace; // '%>' -> '}'
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (LangOpts.Digraphs && Char == ':') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
+ Kind = tok::hashhash; // '%:%:' -> '##'
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::ext_charize_microsoft);
+ Kind = tok::hashat;
+ } else { // '%:' -> '#'
+ // We parsed a # character. If this occurs at the start of the line,
+ // it's actually the start of a preprocessing directive. Callback to
+ // the preprocessor to handle it.
+ // TODO: -fpreprocessed mode??
+ if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
+ goto HandleDirective;
+
+ Kind = tok::hash;
+ }
+ } else {
+ Kind = tok::percent;
+ }
+ break;
+ case '<':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (ParsingFilename) {
+ return LexAngledStringLiteral(Result, CurPtr);
+ } else if (Char == '<') {
+ char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ if (After == '=') {
+ Kind = tok::lesslessequal;
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
+ // If this is actually a '<<<<<<<' version control conflict marker,
+ // recognize it as such and recover nicely.
+ goto LexNextToken;
+ } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) {
+ // If this is '<<<<' and we're in a Perforce-style conflict marker,
+ // ignore it.
+ goto LexNextToken;
+ } else if (LangOpts.CUDA && After == '<') {
+ Kind = tok::lesslessless;
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::lessless;
+ }
+ } else if (Char == '=') {
+ char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ if (After == '>') {
+ if (getLangOpts().CPlusPlus2a) {
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ Kind = tok::spaceship;
+ break;
+ }
+ // Suggest adding a space between the '<=' and the '>' to avoid a
+ // change in semantics if this turns up in C++ <=17 mode.
+ if (getLangOpts().CPlusPlus && !isLexingRawMode()) {
+ Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship)
+ << FixItHint::CreateInsertion(
+ getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " ");
+ }
+ }
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::lessequal;
+ } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '['
+ if (LangOpts.CPlusPlus11 &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
+ // C++0x [lex.pptoken]p3:
+ // Otherwise, if the next three characters are <:: and the subsequent
+ // character is neither : nor >, the < is treated as a preprocessor
+ // token by itself and not as the first character of the alternative
+ // token <:.
+ unsigned SizeTmp3;
+ char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
+ if (After != ':' && After != '>') {
+ Kind = tok::less;
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
+ break;
+ }
+ }
+
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::l_square;
+ } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{'
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::l_brace;
+ } else if (Char == '#' && /*Not a trigraph*/ SizeTmp == 1 &&
+ lexEditorPlaceholder(Result, CurPtr)) {
+ return true;
+ } else {
+ Kind = tok::less;
+ }
+ break;
+ case '>':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::greaterequal;
+ } else if (Char == '>') {
+ char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ if (After == '=') {
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ Kind = tok::greatergreaterequal;
+ } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) {
+ // If this is actually a '>>>>' conflict marker, recognize it as such
+ // and recover nicely.
+ goto LexNextToken;
+ } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
+ // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
+ goto LexNextToken;
+ } else if (LangOpts.CUDA && After == '>') {
+ Kind = tok::greatergreatergreater;
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::greatergreater;
+ }
+ } else {
+ Kind = tok::greater;
+ }
+ break;
+ case '^':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::caretequal;
+ } else if (LangOpts.OpenCL && Char == '^') {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::caretcaret;
+ } else {
+ Kind = tok::caret;
+ }
+ break;
+ case '|':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ Kind = tok::pipeequal;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '|') {
+ // If this is '|||||||' and we're in a conflict marker, ignore it.
+ if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
+ goto LexNextToken;
+ Kind = tok::pipepipe;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Kind = tok::pipe;
+ }
+ break;
+ case ':':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (LangOpts.Digraphs && Char == '>') {
+ Kind = tok::r_square; // ':>' -> ']'
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if ((LangOpts.CPlusPlus ||
+ LangOpts.DoubleSquareBracketAttributes) &&
+ Char == ':') {
+ Kind = tok::coloncolon;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Kind = tok::colon;
+ }
+ break;
+ case ';':
+ Kind = tok::semi;
+ break;
+ case '=':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '=') {
+ // If this is '====' and we're in a conflict marker, ignore it.
+ if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
+ goto LexNextToken;
+
+ Kind = tok::equalequal;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ Kind = tok::equal;
+ }
+ break;
+ case ',':
+ Kind = tok::comma;
+ break;
+ case '#':
+ Char = getCharAndSize(CurPtr, SizeTmp);
+ if (Char == '#') {
+ Kind = tok::hashhash;
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize
+ Kind = tok::hashat;
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::ext_charize_microsoft);
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ } else {
+ // We parsed a # character. If this occurs at the start of the line,
+ // it's actually the start of a preprocessing directive. Callback to
+ // the preprocessor to handle it.
+ // TODO: -fpreprocessed mode??
+ if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
+ goto HandleDirective;
+
+ Kind = tok::hash;
+ }
+ break;
+
+ case '@':
+ // Objective C support.
+ if (CurPtr[-1] == '@' && LangOpts.ObjC)
+ Kind = tok::at;
+ else
+ Kind = tok::unknown;
+ break;
+
+ // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
+ case '\\':
+ if (!LangOpts.AsmPreprocessor) {
+ if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+ if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+ if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+ return true; // KeepWhitespaceMode
+
+ // We only saw whitespace, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+ }
+
+ return LexUnicode(Result, CodePoint, CurPtr);
+ }
+ }
+
+ Kind = tok::unknown;
+ break;
+
+ default: {
+ if (isASCII(Char)) {
+ Kind = tok::unknown;
+ break;
+ }
+
+ llvm::UTF32 CodePoint;
+
+ // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
+ // an escaped newline.
+ --CurPtr;
+ llvm::ConversionResult Status =
+ llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
+ (const llvm::UTF8 *)BufferEnd,
+ &CodePoint,
+ llvm::strictConversion);
+ if (Status == llvm::conversionOK) {
+ if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+ if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+ return true; // KeepWhitespaceMode
+
+ // We only saw whitespace, so just try again with this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+ }
+ return LexUnicode(Result, CodePoint, CurPtr);
+ }
+
+ if (isLexingRawMode() || ParsingPreprocessorDirective ||
+ PP->isPreprocessedOutput()) {
+ ++CurPtr;
+ Kind = tok::unknown;
+ break;
+ }
+
+ // Non-ASCII characters tend to creep into source code unintentionally.
+ // Instead of letting the parser complain about the unknown token,
+ // just diagnose the invalid UTF-8, then drop the character.
+ Diag(CurPtr, diag::err_invalid_utf8);
+
+ BufferPtr = CurPtr+1;
+ // We're pretending the character didn't exist, so just try again with
+ // this lexer.
+ // (We manually eliminate the tail call to avoid recursion.)
+ goto LexNextToken;
+ }
+ }
+
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr, Kind);
+ return true;
+
+HandleDirective:
+ // We parsed a # character and it's the start of a preprocessing directive.
+
+ FormTokenWithChars(Result, CurPtr, tok::hash);
+ PP->HandleDirective(Result);
+
+ if (PP->hadModuleLoaderFatalFailure()) {
+ // With a fatal failure in the module loader, we abort parsing.
+ assert(Result.is(tok::eof) && "Preprocessor did not set tok:eof");
+ return true;
+ }
+
+ // We parsed the directive; lex a token with the new state.
+ return false;
+}
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
new file mode 100644
index 000000000000..2108408377fb
--- /dev/null
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -0,0 +1,1896 @@
+//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NumericLiteralParser, CharLiteralParser, and
+// StringLiteralParser interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+using namespace clang;
+
+static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
+ switch (kind) {
+ default: llvm_unreachable("Unknown token type!");
+ case tok::char_constant:
+ case tok::string_literal:
+ case tok::utf8_char_constant:
+ case tok::utf8_string_literal:
+ return Target.getCharWidth();
+ case tok::wide_char_constant:
+ case tok::wide_string_literal:
+ return Target.getWCharWidth();
+ case tok::utf16_char_constant:
+ case tok::utf16_string_literal:
+ return Target.getChar16Width();
+ case tok::utf32_char_constant:
+ case tok::utf32_string_literal:
+ return Target.getChar32Width();
+ }
+}
+
+static CharSourceRange MakeCharSourceRange(const LangOptions &Features,
+ FullSourceLoc TokLoc,
+ const char *TokBegin,
+ const char *TokRangeBegin,
+ const char *TokRangeEnd) {
+ SourceLocation Begin =
+ Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
+ TokLoc.getManager(), Features);
+ SourceLocation End =
+ Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
+ TokLoc.getManager(), Features);
+ return CharSourceRange::getCharRange(Begin, End);
+}
+
+/// Produce a diagnostic highlighting some portion of a literal.
+///
+/// Emits the diagnostic \p DiagID, highlighting the range of characters from
+/// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
+/// a substring of a spelling buffer for the token beginning at \p TokBegin.
+static DiagnosticBuilder Diag(DiagnosticsEngine *Diags,
+ const LangOptions &Features, FullSourceLoc TokLoc,
+ const char *TokBegin, const char *TokRangeBegin,
+ const char *TokRangeEnd, unsigned DiagID) {
+ SourceLocation Begin =
+ Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
+ TokLoc.getManager(), Features);
+ return Diags->Report(Begin, DiagID) <<
+ MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
+}
+
+/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
+/// either a character or a string literal.
+static unsigned ProcessCharEscape(const char *ThisTokBegin,
+ const char *&ThisTokBuf,
+ const char *ThisTokEnd, bool &HadError,
+ FullSourceLoc Loc, unsigned CharWidth,
+ DiagnosticsEngine *Diags,
+ const LangOptions &Features) {
+ const char *EscapeBegin = ThisTokBuf;
+
+ // Skip the '\' char.
+ ++ThisTokBuf;
+
+ // We know that this character can't be off the end of the buffer, because
+ // that would have been \", which would not have been the end of string.
+ unsigned ResultChar = *ThisTokBuf++;
+ switch (ResultChar) {
+ // These map to themselves.
+ case '\\': case '\'': case '"': case '?': break;
+
+ // These have fixed mappings.
+ case 'a':
+ // TODO: K&R: the meaning of '\\a' is different in traditional C
+ ResultChar = 7;
+ break;
+ case 'b':
+ ResultChar = 8;
+ break;
+ case 'e':
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::ext_nonstandard_escape) << "e";
+ ResultChar = 27;
+ break;
+ case 'E':
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::ext_nonstandard_escape) << "E";
+ ResultChar = 27;
+ break;
+ case 'f':
+ ResultChar = 12;
+ break;
+ case 'n':
+ ResultChar = 10;
+ break;
+ case 'r':
+ ResultChar = 13;
+ break;
+ case 't':
+ ResultChar = 9;
+ break;
+ case 'v':
+ ResultChar = 11;
+ break;
+ case 'x': { // Hex escape.
+ ResultChar = 0;
+ if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_hex_escape_no_digits) << "x";
+ HadError = true;
+ break;
+ }
+
+ // Hex escapes are a maximal series of hex digits.
+ bool Overflow = false;
+ for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
+ int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
+ if (CharVal == -1) break;
+ // About to shift out a digit?
+ if (ResultChar & 0xF0000000)
+ Overflow = true;
+ ResultChar <<= 4;
+ ResultChar |= CharVal;
+ }
+
+ // See if any bits will be truncated when evaluated as a character.
+ if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+ Overflow = true;
+ ResultChar &= ~0U >> (32-CharWidth);
+ }
+
+ // Check for overflow.
+ if (Overflow && Diags) // Too many digits to fit in
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_escape_too_large) << 0;
+ break;
+ }
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7': {
+ // Octal escapes.
+ --ThisTokBuf;
+ ResultChar = 0;
+
+ // Octal escapes are a series of octal digits with maximum length 3.
+ // "\0123" is a two digit sequence equal to "\012" "3".
+ unsigned NumDigits = 0;
+ do {
+ ResultChar <<= 3;
+ ResultChar |= *ThisTokBuf++ - '0';
+ ++NumDigits;
+ } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
+ ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
+
+ // Check for overflow. Reject '\777', but not L'\777'.
+ if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_escape_too_large) << 1;
+ ResultChar &= ~0U >> (32-CharWidth);
+ }
+ break;
+ }
+
+ // Otherwise, these are not valid escapes.
+ case '(': case '{': case '[': case '%':
+ // GCC accepts these as extensions. We warn about them as such though.
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::ext_nonstandard_escape)
+ << std::string(1, ResultChar);
+ break;
+ default:
+ if (!Diags)
+ break;
+
+ if (isPrintable(ResultChar))
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::ext_unknown_escape)
+ << std::string(1, ResultChar);
+ else
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::ext_unknown_escape)
+ << "x" + llvm::utohexstr(ResultChar);
+ break;
+ }
+
+ return ResultChar;
+}
+
+static void appendCodePoint(unsigned Codepoint,
+ llvm::SmallVectorImpl<char> &Str) {
+ char ResultBuf[4];
+ char *ResultPtr = ResultBuf;
+ bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
+ (void)Res;
+ assert(Res && "Unexpected conversion failure");
+ Str.append(ResultBuf, ResultPtr);
+}
+
+void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
+ for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
+ if (*I != '\\') {
+ Buf.push_back(*I);
+ continue;
+ }
+
+ ++I;
+ assert(*I == 'u' || *I == 'U');
+
+ unsigned NumHexDigits;
+ if (*I == 'u')
+ NumHexDigits = 4;
+ else
+ NumHexDigits = 8;
+
+ assert(I + NumHexDigits <= E);
+
+ uint32_t CodePoint = 0;
+ for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+ unsigned Value = llvm::hexDigitValue(*I);
+ assert(Value != -1U);
+
+ CodePoint <<= 4;
+ CodePoint += Value;
+ }
+
+ appendCodePoint(CodePoint, Buf);
+ --I;
+ }
+}
+
+/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
+/// return the UTF32.
+static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
+ const char *ThisTokEnd,
+ uint32_t &UcnVal, unsigned short &UcnLen,
+ FullSourceLoc Loc, DiagnosticsEngine *Diags,
+ const LangOptions &Features,
+ bool in_char_string_literal = false) {
+ const char *UcnBegin = ThisTokBuf;
+
+ // Skip the '\u' char's.
+ ThisTokBuf += 2;
+
+ if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
+ return false;
+ }
+ UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
+ unsigned short UcnLenSave = UcnLen;
+ for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
+ int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
+ if (CharVal == -1) break;
+ UcnVal <<= 4;
+ UcnVal |= CharVal;
+ }
+ // If we didn't consume the proper number of digits, there is a problem.
+ if (UcnLenSave) {
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_ucn_escape_incomplete);
+ return false;
+ }
+
+ // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
+ if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
+ UcnVal > 0x10FFFF) { // maximum legal UTF32 value
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_ucn_escape_invalid);
+ return false;
+ }
+
+ // C++11 allows UCNs that refer to control characters and basic source
+ // characters inside character and string literals
+ if (UcnVal < 0xa0 &&
+ (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
+ bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal);
+ if (Diags) {
+ char BasicSCSChar = UcnVal;
+ if (UcnVal >= 0x20 && UcnVal < 0x7f)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ IsError ? diag::err_ucn_escape_basic_scs :
+ diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
+ << StringRef(&BasicSCSChar, 1);
+ else
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ IsError ? diag::err_ucn_control_character :
+ diag::warn_cxx98_compat_literal_ucn_control_character);
+ }
+ if (IsError)
+ return false;
+ }
+
+ if (!Features.CPlusPlus && !Features.C99 && Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::warn_ucn_not_valid_in_c89_literal);
+
+ return true;
+}
+
+/// MeasureUCNEscape - Determine the number of bytes within the resulting string
+/// which this UCN will occupy.
+static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
+ const char *ThisTokEnd, unsigned CharByteWidth,
+ const LangOptions &Features, bool &HadError) {
+ // UTF-32: 4 bytes per escape.
+ if (CharByteWidth == 4)
+ return 4;
+
+ uint32_t UcnVal = 0;
+ unsigned short UcnLen = 0;
+ FullSourceLoc Loc;
+
+ if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
+ UcnLen, Loc, nullptr, Features, true)) {
+ HadError = true;
+ return 0;
+ }
+
+ // UTF-16: 2 bytes for BMP, 4 bytes otherwise.
+ if (CharByteWidth == 2)
+ return UcnVal <= 0xFFFF ? 2 : 4;
+
+ // UTF-8.
+ if (UcnVal < 0x80)
+ return 1;
+ if (UcnVal < 0x800)
+ return 2;
+ if (UcnVal < 0x10000)
+ return 3;
+ return 4;
+}
+
+/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
+/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
+/// StringLiteralParser. When we decide to implement UCN's for identifiers,
+/// we will likely rework our support for UCN's.
+static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
+ const char *ThisTokEnd,
+ char *&ResultBuf, bool &HadError,
+ FullSourceLoc Loc, unsigned CharByteWidth,
+ DiagnosticsEngine *Diags,
+ const LangOptions &Features) {
+ typedef uint32_t UTF32;
+ UTF32 UcnVal = 0;
+ unsigned short UcnLen = 0;
+ if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
+ Loc, Diags, Features, true)) {
+ HadError = true;
+ return;
+ }
+
+ assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) &&
+ "only character widths of 1, 2, or 4 bytes supported");
+
+ (void)UcnLen;
+ assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
+
+ if (CharByteWidth == 4) {
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf);
+ *ResultPtr = UcnVal;
+ ResultBuf += 4;
+ return;
+ }
+
+ if (CharByteWidth == 2) {
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf);
+
+ if (UcnVal <= (UTF32)0xFFFF) {
+ *ResultPtr = UcnVal;
+ ResultBuf += 2;
+ return;
+ }
+
+ // Convert to UTF16.
+ UcnVal -= 0x10000;
+ *ResultPtr = 0xD800 + (UcnVal >> 10);
+ *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
+ ResultBuf += 4;
+ return;
+ }
+
+ assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
+
+ // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
+ // The conversion below was inspired by:
+ // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
+ // First, we determine how many bytes the result will require.
+ typedef uint8_t UTF8;
+
+ unsigned short bytesToWrite = 0;
+ if (UcnVal < (UTF32)0x80)
+ bytesToWrite = 1;
+ else if (UcnVal < (UTF32)0x800)
+ bytesToWrite = 2;
+ else if (UcnVal < (UTF32)0x10000)
+ bytesToWrite = 3;
+ else
+ bytesToWrite = 4;
+
+ const unsigned byteMask = 0xBF;
+ const unsigned byteMark = 0x80;
+
+ // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
+ // into the first byte, depending on how many bytes follow.
+ static const UTF8 firstByteMark[5] = {
+ 0x00, 0x00, 0xC0, 0xE0, 0xF0
+ };
+ // Finally, we write the bytes into ResultBuf.
+ ResultBuf += bytesToWrite;
+ switch (bytesToWrite) { // note: everything falls through.
+ case 4:
+ *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+ LLVM_FALLTHROUGH;
+ case 3:
+ *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+ LLVM_FALLTHROUGH;
+ case 2:
+ *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+ LLVM_FALLTHROUGH;
+ case 1:
+ *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
+ }
+ // Update the buffer.
+ ResultBuf += bytesToWrite;
+}
+
+/// integer-constant: [C99 6.4.4.1]
+/// decimal-constant integer-suffix
+/// octal-constant integer-suffix
+/// hexadecimal-constant integer-suffix
+/// binary-literal integer-suffix [GNU, C++1y]
+/// user-defined-integer-literal: [C++11 lex.ext]
+/// decimal-literal ud-suffix
+/// octal-literal ud-suffix
+/// hexadecimal-literal ud-suffix
+/// binary-literal ud-suffix [GNU, C++1y]
+/// decimal-constant:
+/// nonzero-digit
+/// decimal-constant digit
+/// octal-constant:
+/// 0
+/// octal-constant octal-digit
+/// hexadecimal-constant:
+/// hexadecimal-prefix hexadecimal-digit
+/// hexadecimal-constant hexadecimal-digit
+/// hexadecimal-prefix: one of
+/// 0x 0X
+/// binary-literal:
+/// 0b binary-digit
+/// 0B binary-digit
+/// binary-literal binary-digit
+/// integer-suffix:
+/// unsigned-suffix [long-suffix]
+/// unsigned-suffix [long-long-suffix]
+/// long-suffix [unsigned-suffix]
+/// long-long-suffix [unsigned-sufix]
+/// nonzero-digit:
+/// 1 2 3 4 5 6 7 8 9
+/// octal-digit:
+/// 0 1 2 3 4 5 6 7
+/// hexadecimal-digit:
+/// 0 1 2 3 4 5 6 7 8 9
+/// a b c d e f
+/// A B C D E F
+/// binary-digit:
+/// 0
+/// 1
+/// unsigned-suffix: one of
+/// u U
+/// long-suffix: one of
+/// l L
+/// long-long-suffix: one of
+/// ll LL
+///
+/// floating-constant: [C99 6.4.4.2]
+/// TODO: add rules...
+///
+NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
+ SourceLocation TokLoc,
+ Preprocessor &PP)
+ : PP(PP), ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
+
+ // This routine assumes that the range begin/end matches the regex for integer
+ // and FP constants (specifically, the 'pp-number' regex), and assumes that
+ // the byte at "*end" is both valid and not part of the regex. Because of
+ // this, it doesn't have to check for 'overscan' in various places.
+ assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
+
+ s = DigitsBegin = ThisTokBegin;
+ saw_exponent = false;
+ saw_period = false;
+ saw_ud_suffix = false;
+ saw_fixed_point_suffix = false;
+ isLong = false;
+ isUnsigned = false;
+ isLongLong = false;
+ isHalf = false;
+ isFloat = false;
+ isImaginary = false;
+ isFloat16 = false;
+ isFloat128 = false;
+ MicrosoftInteger = 0;
+ isFract = false;
+ isAccum = false;
+ hadError = false;
+
+ if (*s == '0') { // parse radix
+ ParseNumberStartingWithZero(TokLoc);
+ if (hadError)
+ return;
+ } else { // the first digit is non-zero
+ radix = 10;
+ s = SkipDigits(s);
+ if (s == ThisTokEnd) {
+ // Done.
+ } else {
+ ParseDecimalOrOctalCommon(TokLoc);
+ if (hadError)
+ return;
+ }
+ }
+
+ SuffixBegin = s;
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
+
+ // Initial scan to lookahead for fixed point suffix.
+ if (PP.getLangOpts().FixedPoint) {
+ for (const char *c = s; c != ThisTokEnd; ++c) {
+ if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') {
+ saw_fixed_point_suffix = true;
+ break;
+ }
+ }
+ }
+
+ // Parse the suffix. At this point we can classify whether we have an FP or
+ // integer constant.
+ bool isFPConstant = isFloatingLiteral();
+
+ // Loop over all of the characters of the suffix. If we see something bad,
+ // we break out of the loop.
+ for (; s != ThisTokEnd; ++s) {
+ switch (*s) {
+ case 'R':
+ case 'r':
+ if (!PP.getLangOpts().FixedPoint) break;
+ if (isFract || isAccum) break;
+ if (!(saw_period || saw_exponent)) break;
+ isFract = true;
+ continue;
+ case 'K':
+ case 'k':
+ if (!PP.getLangOpts().FixedPoint) break;
+ if (isFract || isAccum) break;
+ if (!(saw_period || saw_exponent)) break;
+ isAccum = true;
+ continue;
+ case 'h': // FP Suffix for "half".
+ case 'H':
+ // OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
+ if (!(PP.getLangOpts().Half || PP.getLangOpts().FixedPoint)) break;
+ if (isIntegerLiteral()) break; // Error for integer constant.
+ if (isHalf || isFloat || isLong) break; // HH, FH, LH invalid.
+ isHalf = true;
+ continue; // Success.
+ case 'f': // FP Suffix for "float"
+ case 'F':
+ if (!isFPConstant) break; // Error for integer constant.
+ if (isHalf || isFloat || isLong || isFloat128)
+ break; // HF, FF, LF, QF invalid.
+
+ // CUDA host and device may have different _Float16 support, therefore
+ // allows f16 literals to avoid false alarm.
+ // ToDo: more precise check for CUDA.
+ if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) &&
+ s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
+ s += 2; // success, eat up 2 characters.
+ isFloat16 = true;
+ continue;
+ }
+
+ isFloat = true;
+ continue; // Success.
+ case 'q': // FP Suffix for "__float128"
+ case 'Q':
+ if (!isFPConstant) break; // Error for integer constant.
+ if (isHalf || isFloat || isLong || isFloat128)
+ break; // HQ, FQ, LQ, QQ invalid.
+ isFloat128 = true;
+ continue; // Success.
+ case 'u':
+ case 'U':
+ if (isFPConstant) break; // Error for floating constant.
+ if (isUnsigned) break; // Cannot be repeated.
+ isUnsigned = true;
+ continue; // Success.
+ case 'l':
+ case 'L':
+ if (isLong || isLongLong) break; // Cannot be repeated.
+ if (isHalf || isFloat || isFloat128) break; // LH, LF, LQ invalid.
+
+ // Check for long long. The L's need to be adjacent and the same case.
+ if (s[1] == s[0]) {
+ assert(s + 1 < ThisTokEnd && "didn't maximally munch?");
+ if (isFPConstant) break; // long long invalid for floats.
+ isLongLong = true;
+ ++s; // Eat both of them.
+ } else {
+ isLong = true;
+ }
+ continue; // Success.
+ case 'i':
+ case 'I':
+ if (PP.getLangOpts().MicrosoftExt) {
+ if (isLong || isLongLong || MicrosoftInteger)
+ break;
+
+ if (!isFPConstant) {
+ // Allow i8, i16, i32, and i64.
+ switch (s[1]) {
+ case '8':
+ s += 2; // i8 suffix
+ MicrosoftInteger = 8;
+ break;
+ case '1':
+ if (s[2] == '6') {
+ s += 3; // i16 suffix
+ MicrosoftInteger = 16;
+ }
+ break;
+ case '3':
+ if (s[2] == '2') {
+ s += 3; // i32 suffix
+ MicrosoftInteger = 32;
+ }
+ break;
+ case '6':
+ if (s[2] == '4') {
+ s += 3; // i64 suffix
+ MicrosoftInteger = 64;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ if (MicrosoftInteger) {
+ assert(s <= ThisTokEnd && "didn't maximally munch?");
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
+ case 'j':
+ case 'J':
+ if (isImaginary) break; // Cannot be repeated.
+ isImaginary = true;
+ continue; // Success.
+ }
+ // If we reached here, there was an error or a ud-suffix.
+ break;
+ }
+
+ // "i", "if", and "il" are user-defined suffixes in C++1y.
+ if (s != ThisTokEnd || isImaginary) {
+ // FIXME: Don't bother expanding UCNs if !tok.hasUCN().
+ expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
+ if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) {
+ if (!isImaginary) {
+ // Any suffix pieces we might have parsed are actually part of the
+ // ud-suffix.
+ isLong = false;
+ isUnsigned = false;
+ isLongLong = false;
+ isFloat = false;
+ isFloat16 = false;
+ isHalf = false;
+ isImaginary = false;
+ MicrosoftInteger = 0;
+ saw_fixed_point_suffix = false;
+ isFract = false;
+ isAccum = false;
+ }
+
+ saw_ud_suffix = true;
+ return;
+ }
+
+ if (s != ThisTokEnd) {
+ // Report an error if there are any.
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin),
+ diag::err_invalid_suffix_constant)
+ << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin) << isFPConstant;
+ hadError = true;
+ }
+ }
+
+ if (!hadError && saw_fixed_point_suffix) {
+ assert(isFract || isAccum);
+ }
+}
+
+/// ParseDecimalOrOctalCommon - This method is called for decimal or octal
+/// numbers. It issues an error for illegal digits, and handles floating point
+/// parsing. If it detects a floating point number, the radix is set to 10.
+void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
+ assert((radix == 8 || radix == 10) && "Unexpected radix");
+
+ // If we have a hex digit other than 'e' (which denotes a FP exponent) then
+ // the code is using an incorrect base.
+ if (isHexDigit(*s) && *s != 'e' && *s != 'E' &&
+ !isValidUDSuffix(PP.getLangOpts(), StringRef(s, ThisTokEnd - s))) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
+ diag::err_invalid_digit) << StringRef(s, 1) << (radix == 8 ? 1 : 0);
+ hadError = true;
+ return;
+ }
+
+ if (*s == '.') {
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
+ s++;
+ radix = 10;
+ saw_period = true;
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
+ s = SkipDigits(s); // Skip suffix.
+ }
+ if (*s == 'e' || *s == 'E') { // exponent
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
+ const char *Exponent = s;
+ s++;
+ radix = 10;
+ saw_exponent = true;
+ if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
+ const char *first_non_digit = SkipDigits(s);
+ if (containsDigits(s, first_non_digit)) {
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
+ s = first_non_digit;
+ } else {
+ if (!hadError) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
+ diag::err_exponent_has_no_digits);
+ hadError = true;
+ }
+ return;
+ }
+ }
+}
+
+/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
+/// suffixes as ud-suffixes, because the diagnostic experience is better if we
+/// treat it as an invalid suffix.
+bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
+ StringRef Suffix) {
+ if (!LangOpts.CPlusPlus11 || Suffix.empty())
+ return false;
+
+ // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
+ if (Suffix[0] == '_')
+ return true;
+
+ // In C++11, there are no library suffixes.
+ if (!LangOpts.CPlusPlus14)
+ return false;
+
+ // In C++14, "s", "h", "min", "ms", "us", and "ns" are used in the library.
+ // Per tweaked N3660, "il", "i", and "if" are also used in the library.
+ // In C++2a "d" and "y" are used in the library.
+ return llvm::StringSwitch<bool>(Suffix)
+ .Cases("h", "min", "s", true)
+ .Cases("ms", "us", "ns", true)
+ .Cases("il", "i", "if", true)
+ .Cases("d", "y", LangOpts.CPlusPlus2a)
+ .Default(false);
+}
+
+void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
+ const char *Pos,
+ CheckSeparatorKind IsAfterDigits) {
+ if (IsAfterDigits == CSK_AfterDigits) {
+ if (Pos == ThisTokBegin)
+ return;
+ --Pos;
+ } else if (Pos == ThisTokEnd)
+ return;
+
+ if (isDigitSeparator(*Pos)) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin),
+ diag::err_digit_separator_not_between_digits)
+ << IsAfterDigits;
+ hadError = true;
+ }
+}
+
+/// ParseNumberStartingWithZero - This method is called when the first character
+/// of the number is found to be a zero. This means it is either an octal
+/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
+/// a floating point number (01239.123e4). Eat the prefix, determining the
+/// radix etc.
+void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
+ assert(s[0] == '0' && "Invalid method call");
+ s++;
+
+ int c1 = s[0];
+
+ // Handle a hex number like 0x1234.
+ if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
+ s++;
+ assert(s < ThisTokEnd && "didn't maximally munch?");
+ radix = 16;
+ DigitsBegin = s;
+ s = SkipHexDigits(s);
+ bool HasSignificandDigits = containsDigits(DigitsBegin, s);
+ if (s == ThisTokEnd) {
+ // Done.
+ } else if (*s == '.') {
+ s++;
+ saw_period = true;
+ const char *floatDigitsBegin = s;
+ s = SkipHexDigits(s);
+ if (containsDigits(floatDigitsBegin, s))
+ HasSignificandDigits = true;
+ if (HasSignificandDigits)
+ checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
+ }
+
+ if (!HasSignificandDigits) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
+ diag::err_hex_constant_requires)
+ << PP.getLangOpts().CPlusPlus << 1;
+ hadError = true;
+ return;
+ }
+
+ // A binary exponent can appear with or with a '.'. If dotted, the
+ // binary exponent is required.
+ if (*s == 'p' || *s == 'P') {
+ checkSeparator(TokLoc, s, CSK_AfterDigits);
+ const char *Exponent = s;
+ s++;
+ saw_exponent = true;
+ if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++; // sign
+ const char *first_non_digit = SkipDigits(s);
+ if (!containsDigits(s, first_non_digit)) {
+ if (!hadError) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
+ diag::err_exponent_has_no_digits);
+ hadError = true;
+ }
+ return;
+ }
+ checkSeparator(TokLoc, s, CSK_BeforeDigits);
+ s = first_non_digit;
+
+ if (!PP.getLangOpts().HexFloats)
+ PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus
+ ? diag::ext_hex_literal_invalid
+ : diag::ext_hex_constant_invalid);
+ else if (PP.getLangOpts().CPlusPlus17)
+ PP.Diag(TokLoc, diag::warn_cxx17_hex_literal);
+ } else if (saw_period) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
+ diag::err_hex_constant_requires)
+ << PP.getLangOpts().CPlusPlus << 0;
+ hadError = true;
+ }
+ return;
+ }
+
+ // Handle simple binary numbers 0b01010
+ if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {
+ // 0b101010 is a C++1y / GCC extension.
+ PP.Diag(TokLoc,
+ PP.getLangOpts().CPlusPlus14
+ ? diag::warn_cxx11_compat_binary_literal
+ : PP.getLangOpts().CPlusPlus
+ ? diag::ext_binary_literal_cxx14
+ : diag::ext_binary_literal);
+ ++s;
+ assert(s < ThisTokEnd && "didn't maximally munch?");
+ radix = 2;
+ DigitsBegin = s;
+ s = SkipBinaryDigits(s);
+ if (s == ThisTokEnd) {
+ // Done.
+ } else if (isHexDigit(*s) &&
+ !isValidUDSuffix(PP.getLangOpts(),
+ StringRef(s, ThisTokEnd - s))) {
+ PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
+ diag::err_invalid_digit) << StringRef(s, 1) << 2;
+ hadError = true;
+ }
+ // Other suffixes will be diagnosed by the caller.
+ return;
+ }
+
+ // For now, the radix is set to 8. If we discover that we have a
+ // floating point constant, the radix will change to 10. Octal floating
+ // point constants are not permitted (only decimal and hexadecimal).
+ radix = 8;
+ DigitsBegin = s;
+ s = SkipOctalDigits(s);
+ if (s == ThisTokEnd)
+ return; // Done, simple octal number like 01234
+
+ // If we have some other non-octal digit that *is* a decimal digit, see if
+ // this is part of a floating point number like 094.123 or 09e1.
+ if (isDigit(*s)) {
+ const char *EndDecimal = SkipDigits(s);
+ if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
+ s = EndDecimal;
+ radix = 10;
+ }
+ }
+
+ ParseDecimalOrOctalCommon(TokLoc);
+}
+
+static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
+ switch (Radix) {
+ case 2:
+ return NumDigits <= 64;
+ case 8:
+ return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
+ case 10:
+ return NumDigits <= 19; // floor(log10(2^64))
+ case 16:
+ return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
+ default:
+ llvm_unreachable("impossible Radix");
+ }
+}
+
+/// GetIntegerValue - Convert this numeric literal value to an APInt that
+/// matches Val's input width. If there is an overflow, set Val to the low bits
+/// of the result and return true. Otherwise, return false.
+bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
+ // Fast path: Compute a conservative bound on the maximum number of
+ // bits per digit in this radix. If we can't possibly overflow a
+ // uint64 based on that bound then do the simple conversion to
+ // integer. This avoids the expensive overflow checking below, and
+ // handles the common cases that matter (small decimal integers and
+ // hex/octal values which don't overflow).
+ const unsigned NumDigits = SuffixBegin - DigitsBegin;
+ if (alwaysFitsInto64Bits(radix, NumDigits)) {
+ uint64_t N = 0;
+ for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
+ if (!isDigitSeparator(*Ptr))
+ N = N * radix + llvm::hexDigitValue(*Ptr);
+
+ // This will truncate the value to Val's input width. Simply check
+ // for overflow by comparing.
+ Val = N;
+ return Val.getZExtValue() != N;
+ }
+
+ Val = 0;
+ const char *Ptr = DigitsBegin;
+
+ llvm::APInt RadixVal(Val.getBitWidth(), radix);
+ llvm::APInt CharVal(Val.getBitWidth(), 0);
+ llvm::APInt OldVal = Val;
+
+ bool OverflowOccurred = false;
+ while (Ptr < SuffixBegin) {
+ if (isDigitSeparator(*Ptr)) {
+ ++Ptr;
+ continue;
+ }
+
+ unsigned C = llvm::hexDigitValue(*Ptr++);
+
+ // If this letter is out of bound for this radix, reject it.
+ assert(C < radix && "NumericLiteralParser ctor should have rejected this");
+
+ CharVal = C;
+
+ // Add the digit to the value in the appropriate radix. If adding in digits
+ // made the value smaller, then this overflowed.
+ OldVal = Val;
+
+ // Multiply by radix, did overflow occur on the multiply?
+ Val *= RadixVal;
+ OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
+
+ // Add value, did overflow occur on the value?
+ // (a + b) ult b <=> overflow
+ Val += CharVal;
+ OverflowOccurred |= Val.ult(CharVal);
+ }
+ return OverflowOccurred;
+}
+
+llvm::APFloat::opStatus
+NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
+ using llvm::APFloat;
+
+ unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
+
+ llvm::SmallString<16> Buffer;
+ StringRef Str(ThisTokBegin, n);
+ if (Str.find('\'') != StringRef::npos) {
+ Buffer.reserve(n);
+ std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
+ &isDigitSeparator);
+ Str = Buffer;
+ }
+
+ return Result.convertFromString(Str, APFloat::rmNearestTiesToEven);
+}
+
+static inline bool IsExponentPart(char c) {
+ return c == 'p' || c == 'P' || c == 'e' || c == 'E';
+}
+
+bool NumericLiteralParser::GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale) {
+ assert(radix == 16 || radix == 10);
+
+ // Find how many digits are needed to store the whole literal.
+ unsigned NumDigits = SuffixBegin - DigitsBegin;
+ if (saw_period) --NumDigits;
+
+ // Initial scan of the exponent if it exists
+ bool ExpOverflowOccurred = false;
+ bool NegativeExponent = false;
+ const char *ExponentBegin;
+ uint64_t Exponent = 0;
+ int64_t BaseShift = 0;
+ if (saw_exponent) {
+ const char *Ptr = DigitsBegin;
+
+ while (!IsExponentPart(*Ptr)) ++Ptr;
+ ExponentBegin = Ptr;
+ ++Ptr;
+ NegativeExponent = *Ptr == '-';
+ if (NegativeExponent) ++Ptr;
+
+ unsigned NumExpDigits = SuffixBegin - Ptr;
+ if (alwaysFitsInto64Bits(radix, NumExpDigits)) {
+ llvm::StringRef ExpStr(Ptr, NumExpDigits);
+ llvm::APInt ExpInt(/*numBits=*/64, ExpStr, /*radix=*/10);
+ Exponent = ExpInt.getZExtValue();
+ } else {
+ ExpOverflowOccurred = true;
+ }
+
+ if (NegativeExponent) BaseShift -= Exponent;
+ else BaseShift += Exponent;
+ }
+
+ // Number of bits needed for decimal literal is
+ // ceil(NumDigits * log2(10)) Integral part
+ // + Scale Fractional part
+ // + ceil(Exponent * log2(10)) Exponent
+ // --------------------------------------------------
+ // ceil((NumDigits + Exponent) * log2(10)) + Scale
+ //
+ // But for simplicity in handling integers, we can round up log2(10) to 4,
+ // making:
+ // 4 * (NumDigits + Exponent) + Scale
+ //
+ // Number of digits needed for hexadecimal literal is
+ // 4 * NumDigits Integral part
+ // + Scale Fractional part
+ // + Exponent Exponent
+ // --------------------------------------------------
+ // (4 * NumDigits) + Scale + Exponent
+ uint64_t NumBitsNeeded;
+ if (radix == 10)
+ NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;
+ else
+ NumBitsNeeded = 4 * NumDigits + Exponent + Scale;
+
+ if (NumBitsNeeded > std::numeric_limits<unsigned>::max())
+ ExpOverflowOccurred = true;
+ llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, /*isSigned=*/false);
+
+ bool FoundDecimal = false;
+
+ int64_t FractBaseShift = 0;
+ const char *End = saw_exponent ? ExponentBegin : SuffixBegin;
+ for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {
+ if (*Ptr == '.') {
+ FoundDecimal = true;
+ continue;
+ }
+
+ // Normal reading of an integer
+ unsigned C = llvm::hexDigitValue(*Ptr);
+ assert(C < radix && "NumericLiteralParser ctor should have rejected this");
+
+ Val *= radix;
+ Val += C;
+
+ if (FoundDecimal)
+ // Keep track of how much we will need to adjust this value by from the
+ // number of digits past the radix point.
+ --FractBaseShift;
+ }
+
+ // For a radix of 16, we will be multiplying by 2 instead of 16.
+ if (radix == 16) FractBaseShift *= 4;
+ BaseShift += FractBaseShift;
+
+ Val <<= Scale;
+
+ uint64_t Base = (radix == 16) ? 2 : 10;
+ if (BaseShift > 0) {
+ for (int64_t i = 0; i < BaseShift; ++i) {
+ Val *= Base;
+ }
+ } else if (BaseShift < 0) {
+ for (int64_t i = BaseShift; i < 0 && !Val.isNullValue(); ++i)
+ Val = Val.udiv(Base);
+ }
+
+ bool IntOverflowOccurred = false;
+ auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());
+ if (Val.getBitWidth() > StoreVal.getBitWidth()) {
+ IntOverflowOccurred |= Val.ugt(MaxVal.zext(Val.getBitWidth()));
+ StoreVal = Val.trunc(StoreVal.getBitWidth());
+ } else if (Val.getBitWidth() < StoreVal.getBitWidth()) {
+ IntOverflowOccurred |= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);
+ StoreVal = Val.zext(StoreVal.getBitWidth());
+ } else {
+ StoreVal = Val;
+ }
+
+ return IntOverflowOccurred || ExpOverflowOccurred;
+}
+
+/// \verbatim
+/// user-defined-character-literal: [C++11 lex.ext]
+/// character-literal ud-suffix
+/// ud-suffix:
+/// identifier
+/// character-literal: [C++11 lex.ccon]
+/// ' c-char-sequence '
+/// u' c-char-sequence '
+/// U' c-char-sequence '
+/// L' c-char-sequence '
+/// u8' c-char-sequence ' [C++1z lex.ccon]
+/// c-char-sequence:
+/// c-char
+/// c-char-sequence c-char
+/// c-char:
+/// any member of the source character set except the single-quote ',
+/// backslash \, or new-line character
+/// escape-sequence
+/// universal-character-name
+/// escape-sequence:
+/// simple-escape-sequence
+/// octal-escape-sequence
+/// hexadecimal-escape-sequence
+/// simple-escape-sequence:
+/// one of \' \" \? \\ \a \b \f \n \r \t \v
+/// octal-escape-sequence:
+/// \ octal-digit
+/// \ octal-digit octal-digit
+/// \ octal-digit octal-digit octal-digit
+/// hexadecimal-escape-sequence:
+/// \x hexadecimal-digit
+/// hexadecimal-escape-sequence hexadecimal-digit
+/// universal-character-name: [C++11 lex.charset]
+/// \u hex-quad
+/// \U hex-quad hex-quad
+/// hex-quad:
+/// hex-digit hex-digit hex-digit hex-digit
+/// \endverbatim
+///
+CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
+ SourceLocation Loc, Preprocessor &PP,
+ tok::TokenKind kind) {
+ // At this point we know that the character matches the regex "(L|u|U)?'.*'".
+ HadError = false;
+
+ Kind = kind;
+
+ const char *TokBegin = begin;
+
+ // Skip over wide character determinant.
+ if (Kind != tok::char_constant)
+ ++begin;
+ if (Kind == tok::utf8_char_constant)
+ ++begin;
+
+ // Skip over the entry quote.
+ assert(begin[0] == '\'' && "Invalid token lexed");
+ ++begin;
+
+ // Remove an optional ud-suffix.
+ if (end[-1] != '\'') {
+ const char *UDSuffixEnd = end;
+ do {
+ --end;
+ } while (end[-1] != '\'');
+ // FIXME: Don't bother with this if !tok.hasUCN().
+ expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
+ UDSuffixOffset = end - TokBegin;
+ }
+
+ // Trim the ending quote.
+ assert(end != begin && "Invalid token lexed");
+ --end;
+
+ // FIXME: The "Value" is an uint64_t so we can handle char literals of
+ // up to 64-bits.
+ // FIXME: This extensively assumes that 'char' is 8-bits.
+ assert(PP.getTargetInfo().getCharWidth() == 8 &&
+ "Assumes char is 8 bits");
+ assert(PP.getTargetInfo().getIntWidth() <= 64 &&
+ (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
+ "Assumes sizeof(int) on target is <= 64 and a multiple of char");
+ assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
+ "Assumes sizeof(wchar) on target is <= 64");
+
+ SmallVector<uint32_t, 4> codepoint_buffer;
+ codepoint_buffer.resize(end - begin);
+ uint32_t *buffer_begin = &codepoint_buffer.front();
+ uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
+
+ // Unicode escapes representing characters that cannot be correctly
+ // represented in a single code unit are disallowed in character literals
+ // by this implementation.
+ uint32_t largest_character_for_kind;
+ if (tok::wide_char_constant == Kind) {
+ largest_character_for_kind =
+ 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
+ } else if (tok::utf8_char_constant == Kind) {
+ largest_character_for_kind = 0x7F;
+ } else if (tok::utf16_char_constant == Kind) {
+ largest_character_for_kind = 0xFFFF;
+ } else if (tok::utf32_char_constant == Kind) {
+ largest_character_for_kind = 0x10FFFF;
+ } else {
+ largest_character_for_kind = 0x7Fu;
+ }
+
+ while (begin != end) {
+ // Is this a span of non-escape characters?
+ if (begin[0] != '\\') {
+ char const *start = begin;
+ do {
+ ++begin;
+ } while (begin != end && *begin != '\\');
+
+ char const *tmp_in_start = start;
+ uint32_t *tmp_out_start = buffer_begin;
+ llvm::ConversionResult res =
+ llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
+ reinterpret_cast<llvm::UTF8 const *>(begin),
+ &buffer_begin, buffer_end, llvm::strictConversion);
+ if (res != llvm::conversionOK) {
+ // If we see bad encoding for unprefixed character literals, warn and
+ // simply copy the byte values, for compatibility with gcc and
+ // older versions of clang.
+ bool NoErrorOnBadEncoding = isAscii();
+ unsigned Msg = diag::err_bad_character_encoding;
+ if (NoErrorOnBadEncoding)
+ Msg = diag::warn_bad_character_encoding;
+ PP.Diag(Loc, Msg);
+ if (NoErrorOnBadEncoding) {
+ start = tmp_in_start;
+ buffer_begin = tmp_out_start;
+ for (; start != begin; ++start, ++buffer_begin)
+ *buffer_begin = static_cast<uint8_t>(*start);
+ } else {
+ HadError = true;
+ }
+ } else {
+ for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
+ if (*tmp_out_start > largest_character_for_kind) {
+ HadError = true;
+ PP.Diag(Loc, diag::err_character_too_large);
+ }
+ }
+ }
+
+ continue;
+ }
+ // Is this a Universal Character Name escape?
+ if (begin[1] == 'u' || begin[1] == 'U') {
+ unsigned short UcnLen = 0;
+ if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
+ FullSourceLoc(Loc, PP.getSourceManager()),
+ &PP.getDiagnostics(), PP.getLangOpts(), true)) {
+ HadError = true;
+ } else if (*buffer_begin > largest_character_for_kind) {
+ HadError = true;
+ PP.Diag(Loc, diag::err_character_too_large);
+ }
+
+ ++buffer_begin;
+ continue;
+ }
+ unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
+ uint64_t result =
+ ProcessCharEscape(TokBegin, begin, end, HadError,
+ FullSourceLoc(Loc,PP.getSourceManager()),
+ CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
+ *buffer_begin++ = result;
+ }
+
+ unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
+
+ if (NumCharsSoFar > 1) {
+ if (isWide())
+ PP.Diag(Loc, diag::warn_extraneous_char_constant);
+ else if (isAscii() && NumCharsSoFar == 4)
+ PP.Diag(Loc, diag::ext_four_char_character_literal);
+ else if (isAscii())
+ PP.Diag(Loc, diag::ext_multichar_character_literal);
+ else
+ PP.Diag(Loc, diag::err_multichar_utf_character_literal);
+ IsMultiChar = true;
+ } else {
+ IsMultiChar = false;
+ }
+
+ llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
+
+ // Narrow character literals act as though their value is concatenated
+ // in this implementation, but warn on overflow.
+ bool multi_char_too_long = false;
+ if (isAscii() && isMultiChar()) {
+ LitVal = 0;
+ for (size_t i = 0; i < NumCharsSoFar; ++i) {
+ // check for enough leading zeros to shift into
+ multi_char_too_long |= (LitVal.countLeadingZeros() < 8);
+ LitVal <<= 8;
+ LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
+ }
+ } else if (NumCharsSoFar > 0) {
+ // otherwise just take the last character
+ LitVal = buffer_begin[-1];
+ }
+
+ if (!HadError && multi_char_too_long) {
+ PP.Diag(Loc, diag::warn_char_constant_too_large);
+ }
+
+ // Transfer the value from APInt to uint64_t
+ Value = LitVal.getZExtValue();
+
+ // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
+ // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
+ // character constants are not sign extended in the this implementation:
+ // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
+ if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
+ PP.getLangOpts().CharIsSigned)
+ Value = (signed char)Value;
+}
+
+/// \verbatim
+/// string-literal: [C++0x lex.string]
+/// encoding-prefix " [s-char-sequence] "
+/// encoding-prefix R raw-string
+/// encoding-prefix:
+/// u8
+/// u
+/// U
+/// L
+/// s-char-sequence:
+/// s-char
+/// s-char-sequence s-char
+/// s-char:
+/// any member of the source character set except the double-quote ",
+/// backslash \, or new-line character
+/// escape-sequence
+/// universal-character-name
+/// raw-string:
+/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
+/// r-char-sequence:
+/// r-char
+/// r-char-sequence r-char
+/// r-char:
+/// any member of the source character set, except a right parenthesis )
+/// followed by the initial d-char-sequence (which may be empty)
+/// followed by a double quote ".
+/// d-char-sequence:
+/// d-char
+/// d-char-sequence d-char
+/// d-char:
+/// any member of the basic source character set except:
+/// space, the left parenthesis (, the right parenthesis ),
+/// the backslash \, and the control characters representing horizontal
+/// tab, vertical tab, form feed, and newline.
+/// escape-sequence: [C++0x lex.ccon]
+/// simple-escape-sequence
+/// octal-escape-sequence
+/// hexadecimal-escape-sequence
+/// simple-escape-sequence:
+/// one of \' \" \? \\ \a \b \f \n \r \t \v
+/// octal-escape-sequence:
+/// \ octal-digit
+/// \ octal-digit octal-digit
+/// \ octal-digit octal-digit octal-digit
+/// hexadecimal-escape-sequence:
+/// \x hexadecimal-digit
+/// hexadecimal-escape-sequence hexadecimal-digit
+/// universal-character-name:
+/// \u hex-quad
+/// \U hex-quad hex-quad
+/// hex-quad:
+/// hex-digit hex-digit hex-digit hex-digit
+/// \endverbatim
+///
+StringLiteralParser::
+StringLiteralParser(ArrayRef<Token> StringToks,
+ Preprocessor &PP, bool Complain)
+ : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
+ Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr),
+ MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+ ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
+ init(StringToks);
+}
+
+void StringLiteralParser::init(ArrayRef<Token> StringToks){
+ // The literal token may have come from an invalid source location (e.g. due
+ // to a PCH error), in which case the token length will be 0.
+ if (StringToks.empty() || StringToks[0].getLength() < 2)
+ return DiagnoseLexingError(SourceLocation());
+
+ // Scan all of the string portions, remember the max individual token length,
+ // computing a bound on the concatenated string length, and see whether any
+ // piece is a wide-string. If any of the string portions is a wide-string
+ // literal, the result is a wide-string literal [C99 6.4.5p4].
+ assert(!StringToks.empty() && "expected at least one token");
+ MaxTokenLength = StringToks[0].getLength();
+ assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
+ SizeBound = StringToks[0].getLength()-2; // -2 for "".
+ Kind = StringToks[0].getKind();
+
+ hadError = false;
+
+ // Implement Translation Phase #6: concatenation of string literals
+ /// (C99 5.1.1.2p1). The common case is only one string fragment.
+ for (unsigned i = 1; i != StringToks.size(); ++i) {
+ if (StringToks[i].getLength() < 2)
+ return DiagnoseLexingError(StringToks[i].getLocation());
+
+ // The string could be shorter than this if it needs cleaning, but this is a
+ // reasonable bound, which is all we need.
+ assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
+ SizeBound += StringToks[i].getLength()-2; // -2 for "".
+
+ // Remember maximum string piece length.
+ if (StringToks[i].getLength() > MaxTokenLength)
+ MaxTokenLength = StringToks[i].getLength();
+
+ // Remember if we see any wide or utf-8/16/32 strings.
+ // Also check for illegal concatenations.
+ if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
+ if (isAscii()) {
+ Kind = StringToks[i].getKind();
+ } else {
+ if (Diags)
+ Diags->Report(StringToks[i].getLocation(),
+ diag::err_unsupported_string_concat);
+ hadError = true;
+ }
+ }
+ }
+
+ // Include space for the null terminator.
+ ++SizeBound;
+
+ // TODO: K&R warning: "traditional C rejects string constant concatenation"
+
+ // Get the width in bytes of char/wchar_t/char16_t/char32_t
+ CharByteWidth = getCharWidth(Kind, Target);
+ assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
+ CharByteWidth /= 8;
+
+ // The output buffer size needs to be large enough to hold wide characters.
+ // This is a worst-case assumption which basically corresponds to L"" "long".
+ SizeBound *= CharByteWidth;
+
+ // Size the temporary buffer to hold the result string data.
+ ResultBuf.resize(SizeBound);
+
+ // Likewise, but for each string piece.
+ SmallString<512> TokenBuf;
+ TokenBuf.resize(MaxTokenLength);
+
+ // Loop over all the strings, getting their spelling, and expanding them to
+ // wide strings as appropriate.
+ ResultPtr = &ResultBuf[0]; // Next byte to fill in.
+
+ Pascal = false;
+
+ SourceLocation UDSuffixTokLoc;
+
+ for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
+ const char *ThisTokBuf = &TokenBuf[0];
+ // Get the spelling of the token, which eliminates trigraphs, etc. We know
+ // that ThisTokBuf points to a buffer that is big enough for the whole token
+ // and 'spelled' tokens can only shrink.
+ bool StringInvalid = false;
+ unsigned ThisTokLen =
+ Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
+ &StringInvalid);
+ if (StringInvalid)
+ return DiagnoseLexingError(StringToks[i].getLocation());
+
+ const char *ThisTokBegin = ThisTokBuf;
+ const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
+
+ // Remove an optional ud-suffix.
+ if (ThisTokEnd[-1] != '"') {
+ const char *UDSuffixEnd = ThisTokEnd;
+ do {
+ --ThisTokEnd;
+ } while (ThisTokEnd[-1] != '"');
+
+ StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
+
+ if (UDSuffixBuf.empty()) {
+ if (StringToks[i].hasUCN())
+ expandUCNs(UDSuffixBuf, UDSuffix);
+ else
+ UDSuffixBuf.assign(UDSuffix);
+ UDSuffixToken = i;
+ UDSuffixOffset = ThisTokEnd - ThisTokBuf;
+ UDSuffixTokLoc = StringToks[i].getLocation();
+ } else {
+ SmallString<32> ExpandedUDSuffix;
+ if (StringToks[i].hasUCN()) {
+ expandUCNs(ExpandedUDSuffix, UDSuffix);
+ UDSuffix = ExpandedUDSuffix;
+ }
+
+ // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
+ // result of a concatenation involving at least one user-defined-string-
+ // literal, all the participating user-defined-string-literals shall
+ // have the same ud-suffix.
+ if (UDSuffixBuf != UDSuffix) {
+ if (Diags) {
+ SourceLocation TokLoc = StringToks[i].getLocation();
+ Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+ << UDSuffixBuf << UDSuffix
+ << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+ << SourceRange(TokLoc, TokLoc);
+ }
+ hadError = true;
+ }
+ }
+ }
+
+ // Strip the end quote.
+ --ThisTokEnd;
+
+ // TODO: Input character set mapping support.
+
+ // Skip marker for wide or unicode strings.
+ if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
+ ++ThisTokBuf;
+ // Skip 8 of u8 marker for utf8 strings.
+ if (ThisTokBuf[0] == '8')
+ ++ThisTokBuf;
+ }
+
+ // Check for raw string
+ if (ThisTokBuf[0] == 'R') {
+ ThisTokBuf += 2; // skip R"
+
+ const char *Prefix = ThisTokBuf;
+ while (ThisTokBuf[0] != '(')
+ ++ThisTokBuf;
+ ++ThisTokBuf; // skip '('
+
+ // Remove same number of characters from the end
+ ThisTokEnd -= ThisTokBuf - Prefix;
+ assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
+
+ // C++14 [lex.string]p4: A source-file new-line in a raw string literal
+ // results in a new-line in the resulting execution string-literal.
+ StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
+ while (!RemainingTokenSpan.empty()) {
+ // Split the string literal on \r\n boundaries.
+ size_t CRLFPos = RemainingTokenSpan.find("\r\n");
+ StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
+ StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
+
+ // Copy everything before the \r\n sequence into the string literal.
+ if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
+ hadError = true;
+
+ // Point into the \n inside the \r\n sequence and operate on the
+ // remaining portion of the literal.
+ RemainingTokenSpan = AfterCRLF.substr(1);
+ }
+ } else {
+ if (ThisTokBuf[0] != '"') {
+ // The file may have come from PCH and then changed after loading the
+ // PCH; Fail gracefully.
+ return DiagnoseLexingError(StringToks[i].getLocation());
+ }
+ ++ThisTokBuf; // skip "
+
+ // Check if this is a pascal string
+ if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
+ ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
+
+ // If the \p sequence is found in the first token, we have a pascal string
+ // Otherwise, if we already have a pascal string, ignore the first \p
+ if (i == 0) {
+ ++ThisTokBuf;
+ Pascal = true;
+ } else if (Pascal)
+ ThisTokBuf += 2;
+ }
+
+ while (ThisTokBuf != ThisTokEnd) {
+ // Is this a span of non-escape characters?
+ if (ThisTokBuf[0] != '\\') {
+ const char *InStart = ThisTokBuf;
+ do {
+ ++ThisTokBuf;
+ } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+
+ // Copy the character span over.
+ if (CopyStringFragment(StringToks[i], ThisTokBegin,
+ StringRef(InStart, ThisTokBuf - InStart)))
+ hadError = true;
+ continue;
+ }
+ // Is this a Universal Character Name escape?
+ if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
+ EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
+ ResultPtr, hadError,
+ FullSourceLoc(StringToks[i].getLocation(), SM),
+ CharByteWidth, Diags, Features);
+ continue;
+ }
+ // Otherwise, this is a non-UCN escape character. Process it.
+ unsigned ResultChar =
+ ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
+ FullSourceLoc(StringToks[i].getLocation(), SM),
+ CharByteWidth*8, Diags, Features);
+
+ if (CharByteWidth == 4) {
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr);
+ *ResultWidePtr = ResultChar;
+ ResultPtr += 4;
+ } else if (CharByteWidth == 2) {
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr);
+ *ResultWidePtr = ResultChar & 0xFFFF;
+ ResultPtr += 2;
+ } else {
+ assert(CharByteWidth == 1 && "Unexpected char width");
+ *ResultPtr++ = ResultChar & 0xFF;
+ }
+ }
+ }
+ }
+
+ if (Pascal) {
+ if (CharByteWidth == 4) {
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data());
+ ResultWidePtr[0] = GetNumStringChars() - 1;
+ } else if (CharByteWidth == 2) {
+ // FIXME: Make the type of the result buffer correct instead of
+ // using reinterpret_cast.
+ llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data());
+ ResultWidePtr[0] = GetNumStringChars() - 1;
+ } else {
+ assert(CharByteWidth == 1 && "Unexpected char width");
+ ResultBuf[0] = GetNumStringChars() - 1;
+ }
+
+ // Verify that pascal strings aren't too large.
+ if (GetStringLength() > 256) {
+ if (Diags)
+ Diags->Report(StringToks.front().getLocation(),
+ diag::err_pascal_string_too_long)
+ << SourceRange(StringToks.front().getLocation(),
+ StringToks.back().getLocation());
+ hadError = true;
+ return;
+ }
+ } else if (Diags) {
+ // Complain if this string literal has too many characters.
+ unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
+
+ if (GetNumStringChars() > MaxChars)
+ Diags->Report(StringToks.front().getLocation(),
+ diag::ext_string_too_long)
+ << GetNumStringChars() << MaxChars
+ << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
+ << SourceRange(StringToks.front().getLocation(),
+ StringToks.back().getLocation());
+ }
+}
+
+static const char *resyncUTF8(const char *Err, const char *End) {
+ if (Err == End)
+ return End;
+ End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
+ while (++Err != End && (*Err & 0xC0) == 0x80)
+ ;
+ return Err;
+}
+
+/// This function copies from Fragment, which is a sequence of bytes
+/// within Tok's contents (which begin at TokBegin) into ResultPtr.
+/// Performs widening for multi-byte characters.
+bool StringLiteralParser::CopyStringFragment(const Token &Tok,
+ const char *TokBegin,
+ StringRef Fragment) {
+ const llvm::UTF8 *ErrorPtrTmp;
+ if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
+ return false;
+
+ // If we see bad encoding for unprefixed string literals, warn and
+ // simply copy the byte values, for compatibility with gcc and older
+ // versions of clang.
+ bool NoErrorOnBadEncoding = isAscii();
+ if (NoErrorOnBadEncoding) {
+ memcpy(ResultPtr, Fragment.data(), Fragment.size());
+ ResultPtr += Fragment.size();
+ }
+
+ if (Diags) {
+ const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
+
+ FullSourceLoc SourceLoc(Tok.getLocation(), SM);
+ const DiagnosticBuilder &Builder =
+ Diag(Diags, Features, SourceLoc, TokBegin,
+ ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
+ NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
+ : diag::err_bad_string_encoding);
+
+ const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
+ StringRef NextFragment(NextStart, Fragment.end()-NextStart);
+
+ // Decode into a dummy buffer.
+ SmallString<512> Dummy;
+ Dummy.reserve(Fragment.size() * CharByteWidth);
+ char *Ptr = Dummy.data();
+
+ while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
+ const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
+ NextStart = resyncUTF8(ErrorPtr, Fragment.end());
+ Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
+ ErrorPtr, NextStart);
+ NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
+ }
+ }
+ return !NoErrorOnBadEncoding;
+}
+
+void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
+ hadError = true;
+ if (Diags)
+ Diags->Report(Loc, diag::err_lexing_string);
+}
+
+/// getOffsetOfStringByte - This function returns the offset of the
+/// specified byte of the string data represented by Token. This handles
+/// advancing over escape sequences in the string.
+unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
+ unsigned ByteNo) const {
+ // Get the spelling of the token.
+ SmallString<32> SpellingBuffer;
+ SpellingBuffer.resize(Tok.getLength());
+
+ bool StringInvalid = false;
+ const char *SpellingPtr = &SpellingBuffer[0];
+ unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
+ &StringInvalid);
+ if (StringInvalid)
+ return 0;
+
+ const char *SpellingStart = SpellingPtr;
+ const char *SpellingEnd = SpellingPtr+TokLen;
+
+ // Handle UTF-8 strings just like narrow strings.
+ if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
+ SpellingPtr += 2;
+
+ assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
+ SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
+
+ // For raw string literals, this is easy.
+ if (SpellingPtr[0] == 'R') {
+ assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
+ // Skip 'R"'.
+ SpellingPtr += 2;
+ while (*SpellingPtr != '(') {
+ ++SpellingPtr;
+ assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
+ }
+ // Skip '('.
+ ++SpellingPtr;
+ return SpellingPtr - SpellingStart + ByteNo;
+ }
+
+ // Skip over the leading quote
+ assert(SpellingPtr[0] == '"' && "Should be a string literal!");
+ ++SpellingPtr;
+
+ // Skip over bytes until we find the offset we're looking for.
+ while (ByteNo) {
+ assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
+
+ // Step over non-escapes simply.
+ if (*SpellingPtr != '\\') {
+ ++SpellingPtr;
+ --ByteNo;
+ continue;
+ }
+
+ // Otherwise, this is an escape character. Advance over it.
+ bool HadError = false;
+ if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') {
+ const char *EscapePtr = SpellingPtr;
+ unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
+ 1, Features, HadError);
+ if (Len > ByteNo) {
+ // ByteNo is somewhere within the escape sequence.
+ SpellingPtr = EscapePtr;
+ break;
+ }
+ ByteNo -= Len;
+ } else {
+ ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
+ FullSourceLoc(Tok.getLocation(), SM),
+ CharByteWidth*8, Diags, Features);
+ --ByteNo;
+ }
+ assert(!HadError && "This method isn't valid on erroneous strings");
+ }
+
+ return SpellingPtr-SpellingStart;
+}
+
+/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
+/// suffixes as ud-suffixes, because the diagnostic experience is better if we
+/// treat it as an invalid suffix.
+bool StringLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
+ StringRef Suffix) {
+ return NumericLiteralParser::isValidUDSuffix(LangOpts, Suffix) ||
+ Suffix == "sv";
+}
diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp
new file mode 100644
index 000000000000..7ede00b4aa64
--- /dev/null
+++ b/clang/lib/Lex/MacroArgs.cpp
@@ -0,0 +1,307 @@
+//===--- MacroArgs.cpp - Formal argument info for Macros ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/MacroArgs.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include <algorithm>
+
+using namespace clang;
+
+/// MacroArgs ctor function - This destroys the vector passed in.
+MacroArgs *MacroArgs::create(const MacroInfo *MI,
+ ArrayRef<Token> UnexpArgTokens,
+ bool VarargsElided, Preprocessor &PP) {
+ assert(MI->isFunctionLike() &&
+ "Can't have args for an object-like macro!");
+ MacroArgs **ResultEnt = nullptr;
+ unsigned ClosestMatch = ~0U;
+
+ // See if we have an entry with a big enough argument list to reuse on the
+ // free list. If so, reuse it.
+ for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
+ Entry = &(*Entry)->ArgCache) {
+ if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
+ (*Entry)->NumUnexpArgTokens < ClosestMatch) {
+ ResultEnt = Entry;
+
+ // If we have an exact match, use it.
+ if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
+ break;
+ // Otherwise, use the best fit.
+ ClosestMatch = (*Entry)->NumUnexpArgTokens;
+ }
+ }
+ MacroArgs *Result;
+ if (!ResultEnt) {
+ // Allocate memory for a MacroArgs object with the lexer tokens at the end,
+ // and construct the MacroArgs object.
+ Result = new (
+ llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size())))
+ MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
+ } else {
+ Result = *ResultEnt;
+ // Unlink this node from the preprocessors singly linked list.
+ *ResultEnt = Result->ArgCache;
+ Result->NumUnexpArgTokens = UnexpArgTokens.size();
+ Result->VarargsElided = VarargsElided;
+ Result->NumMacroArgs = MI->getNumParams();
+ }
+
+ // Copy the actual unexpanded tokens to immediately after the result ptr.
+ if (!UnexpArgTokens.empty()) {
+ static_assert(std::is_trivial<Token>::value,
+ "assume trivial copyability if copying into the "
+ "uninitialized array (as opposed to reusing a cached "
+ "MacroArgs)");
+ std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
+ Result->getTrailingObjects<Token>());
+ }
+
+ return Result;
+}
+
+/// destroy - Destroy and deallocate the memory for this object.
+///
+void MacroArgs::destroy(Preprocessor &PP) {
+ // Don't clear PreExpArgTokens, just clear the entries. Clearing the entries
+ // would deallocate the element vectors.
+ for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
+ PreExpArgTokens[i].clear();
+
+ // Add this to the preprocessor's free list.
+ ArgCache = PP.MacroArgCache;
+ PP.MacroArgCache = this;
+}
+
+/// deallocate - This should only be called by the Preprocessor when managing
+/// its freelist.
+MacroArgs *MacroArgs::deallocate() {
+ MacroArgs *Next = ArgCache;
+
+ // Run the dtor to deallocate the vectors.
+ this->~MacroArgs();
+ // Release the memory for the object.
+ static_assert(std::is_trivially_destructible<Token>::value,
+ "assume trivially destructible and forego destructors");
+ free(this);
+
+ return Next;
+}
+
+
+/// getArgLength - Given a pointer to an expanded or unexpanded argument,
+/// return the number of tokens, not counting the EOF, that make up the
+/// argument.
+unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
+ unsigned NumArgTokens = 0;
+ for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
+ ++NumArgTokens;
+ return NumArgTokens;
+}
+
+
+/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
+///
+const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
+
+ assert(Arg < getNumMacroArguments() && "Invalid arg #");
+ // The unexpanded argument tokens start immediately after the MacroArgs object
+ // in memory.
+ const Token *Start = getTrailingObjects<Token>();
+ const Token *Result = Start;
+
+ // Scan to find Arg.
+ for (; Arg; ++Result) {
+ assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
+ if (Result->is(tok::eof))
+ --Arg;
+ }
+ assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
+ return Result;
+}
+
+bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI,
+ Preprocessor &PP) {
+ if (!MI->isVariadic())
+ return false;
+ const int VariadicArgIndex = getNumMacroArguments() - 1;
+ return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof);
+}
+
+/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
+/// by pre-expansion, return false. Otherwise, conservatively return true.
+bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
+ Preprocessor &PP) const {
+ // If there are no identifiers in the argument list, or if the identifiers are
+ // known to not be macros, pre-expansion won't modify it.
+ for (; ArgTok->isNot(tok::eof); ++ArgTok)
+ if (IdentifierInfo *II = ArgTok->getIdentifierInfo())
+ if (II->hasMacroDefinition())
+ // Return true even though the macro could be a function-like macro
+ // without a following '(' token, or could be disabled, or not visible.
+ return true;
+ return false;
+}
+
+/// getPreExpArgument - Return the pre-expanded form of the specified
+/// argument.
+const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,
+ Preprocessor &PP) {
+ assert(Arg < getNumMacroArguments() && "Invalid argument number!");
+
+ // If we have already computed this, return it.
+ if (PreExpArgTokens.size() < getNumMacroArguments())
+ PreExpArgTokens.resize(getNumMacroArguments());
+
+ std::vector<Token> &Result = PreExpArgTokens[Arg];
+ if (!Result.empty()) return Result;
+
+ SaveAndRestore<bool> PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);
+
+ const Token *AT = getUnexpArgument(Arg);
+ unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
+
+ // Otherwise, we have to pre-expand this argument, populating Result. To do
+ // this, we set up a fake TokenLexer to lex from the unexpanded argument
+ // list. With this installed, we lex expanded tokens until we hit the EOF
+ // token at the end of the unexp list.
+ PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
+ false /*owns tokens*/, false /*is reinject*/);
+
+ // Lex all of the macro-expanded tokens into Result.
+ do {
+ Result.push_back(Token());
+ Token &Tok = Result.back();
+ PP.Lex(Tok);
+ } while (Result.back().isNot(tok::eof));
+
+ // Pop the token stream off the top of the stack. We know that the internal
+ // pointer inside of it is to the "end" of the token stream, but the stack
+ // will not otherwise be popped until the next token is lexed. The problem is
+ // that the token may be lexed sometime after the vector of tokens itself is
+ // destroyed, which would be badness.
+ if (PP.InCachingLexMode())
+ PP.ExitCachingLexMode();
+ PP.RemoveTopOfLexerStack();
+ return Result;
+}
+
+
+/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
+/// tokens into the literal string token that should be produced by the C #
+/// preprocessor operator. If Charify is true, then it should be turned into
+/// a character literal for the Microsoft charize (#@) extension.
+///
+Token MacroArgs::StringifyArgument(const Token *ArgToks,
+ Preprocessor &PP, bool Charify,
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd) {
+ Token Tok;
+ Tok.startToken();
+ Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
+
+ const Token *ArgTokStart = ArgToks;
+
+ // Stringify all the tokens.
+ SmallString<128> Result;
+ Result += "\"";
+
+ bool isFirst = true;
+ for (; ArgToks->isNot(tok::eof); ++ArgToks) {
+ const Token &Tok = *ArgToks;
+ if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
+ Result += ' ';
+ isFirst = false;
+
+ // If this is a string or character constant, escape the token as specified
+ // by 6.10.3.2p2.
+ if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
+ Tok.is(tok::char_constant) || // 'x'
+ Tok.is(tok::wide_char_constant) || // L'x'.
+ Tok.is(tok::utf8_char_constant) || // u8'x'.
+ Tok.is(tok::utf16_char_constant) || // u'x'.
+ Tok.is(tok::utf32_char_constant)) { // U'x'.
+ bool Invalid = false;
+ std::string TokStr = PP.getSpelling(Tok, &Invalid);
+ if (!Invalid) {
+ std::string Str = Lexer::Stringify(TokStr);
+ Result.append(Str.begin(), Str.end());
+ }
+ } else if (Tok.is(tok::code_completion)) {
+ PP.CodeCompleteNaturalLanguage();
+ } else {
+ // Otherwise, just append the token. Do some gymnastics to get the token
+ // in place and avoid copies where possible.
+ unsigned CurStrLen = Result.size();
+ Result.resize(CurStrLen+Tok.getLength());
+ const char *BufPtr = Result.data() + CurStrLen;
+ bool Invalid = false;
+ unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);
+
+ if (!Invalid) {
+ // If getSpelling returned a pointer to an already uniqued version of
+ // the string instead of filling in BufPtr, memcpy it onto our string.
+ if (ActualTokLen && BufPtr != &Result[CurStrLen])
+ memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
+
+ // If the token was dirty, the spelling may be shorter than the token.
+ if (ActualTokLen != Tok.getLength())
+ Result.resize(CurStrLen+ActualTokLen);
+ }
+ }
+ }
+
+ // If the last character of the string is a \, and if it isn't escaped, this
+ // is an invalid string literal, diagnose it as specified in C99.
+ if (Result.back() == '\\') {
+ // Count the number of consecutive \ characters. If even, then they are
+ // just escaped backslashes, otherwise it's an error.
+ unsigned FirstNonSlash = Result.size()-2;
+ // Guaranteed to find the starting " if nothing else.
+ while (Result[FirstNonSlash] == '\\')
+ --FirstNonSlash;
+ if ((Result.size()-1-FirstNonSlash) & 1) {
+ // Diagnose errors for things like: #define F(X) #X / F(\)
+ PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
+ Result.pop_back(); // remove one of the \'s.
+ }
+ }
+ Result += '"';
+
+ // If this is the charify operation and the result is not a legal character
+ // constant, diagnose it.
+ if (Charify) {
+ // First step, turn double quotes into single quotes:
+ Result[0] = '\'';
+ Result[Result.size()-1] = '\'';
+
+ // Check for bogus character.
+ bool isBad = false;
+ if (Result.size() == 3)
+ isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
+ else
+ isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
+
+ if (isBad) {
+ PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
+ Result = "' '"; // Use something arbitrary, but legal.
+ }
+ }
+
+ PP.CreateString(Result, Tok,
+ ExpansionLocStart, ExpansionLocEnd);
+ return Tok;
+}
diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp
new file mode 100644
index 000000000000..1ccd140364ae
--- /dev/null
+++ b/clang/lib/Lex/MacroInfo.cpp
@@ -0,0 +1,248 @@
+//===- MacroInfo.cpp - Information about #defined identifiers -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MacroInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
+
+using namespace clang;
+
+MacroInfo::MacroInfo(SourceLocation DefLoc)
+ : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false),
+ IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false),
+ HasCommaPasting(false), IsDisabled(false), IsUsed(false),
+ IsAllowRedefinitionsWithoutWarning(false), IsWarnIfUnused(false),
+ UsedForHeaderGuard(false) {}
+
+unsigned MacroInfo::getDefinitionLengthSlow(const SourceManager &SM) const {
+ assert(!IsDefinitionLengthCached);
+ IsDefinitionLengthCached = true;
+
+ if (ReplacementTokens.empty())
+ return (DefinitionLength = 0);
+
+ const Token &firstToken = ReplacementTokens.front();
+ const Token &lastToken = ReplacementTokens.back();
+ SourceLocation macroStart = firstToken.getLocation();
+ SourceLocation macroEnd = lastToken.getLocation();
+ assert(macroStart.isValid() && macroEnd.isValid());
+ assert((macroStart.isFileID() || firstToken.is(tok::comment)) &&
+ "Macro defined in macro?");
+ assert((macroEnd.isFileID() || lastToken.is(tok::comment)) &&
+ "Macro defined in macro?");
+ std::pair<FileID, unsigned>
+ startInfo = SM.getDecomposedExpansionLoc(macroStart);
+ std::pair<FileID, unsigned>
+ endInfo = SM.getDecomposedExpansionLoc(macroEnd);
+ assert(startInfo.first == endInfo.first &&
+ "Macro definition spanning multiple FileIDs ?");
+ assert(startInfo.second <= endInfo.second);
+ DefinitionLength = endInfo.second - startInfo.second;
+ DefinitionLength += lastToken.getLength();
+
+ return DefinitionLength;
+}
+
+/// Return true if the specified macro definition is equal to
+/// this macro in spelling, arguments, and whitespace.
+///
+/// \param Syntactically if true, the macro definitions can be identical even
+/// if they use different identifiers for the function macro parameters.
+/// Otherwise the comparison is lexical and this implements the rules in
+/// C99 6.10.3.
+bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
+ bool Syntactically) const {
+ bool Lexically = !Syntactically;
+
+ // Check # tokens in replacement, number of args, and various flags all match.
+ if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
+ getNumParams() != Other.getNumParams() ||
+ isFunctionLike() != Other.isFunctionLike() ||
+ isC99Varargs() != Other.isC99Varargs() ||
+ isGNUVarargs() != Other.isGNUVarargs())
+ return false;
+
+ if (Lexically) {
+ // Check arguments.
+ for (param_iterator I = param_begin(), OI = Other.param_begin(),
+ E = param_end();
+ I != E; ++I, ++OI)
+ if (*I != *OI) return false;
+ }
+
+ // Check all the tokens.
+ for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
+ const Token &A = ReplacementTokens[i];
+ const Token &B = Other.ReplacementTokens[i];
+ if (A.getKind() != B.getKind())
+ return false;
+
+ // If this isn't the first first token, check that the whitespace and
+ // start-of-line characteristics match.
+ if (i != 0 &&
+ (A.isAtStartOfLine() != B.isAtStartOfLine() ||
+ A.hasLeadingSpace() != B.hasLeadingSpace()))
+ return false;
+
+ // If this is an identifier, it is easy.
+ if (A.getIdentifierInfo() || B.getIdentifierInfo()) {
+ if (A.getIdentifierInfo() == B.getIdentifierInfo())
+ continue;
+ if (Lexically)
+ return false;
+ // With syntactic equivalence the parameter names can be different as long
+ // as they are used in the same place.
+ int AArgNum = getParameterNum(A.getIdentifierInfo());
+ if (AArgNum == -1)
+ return false;
+ if (AArgNum != Other.getParameterNum(B.getIdentifierInfo()))
+ return false;
+ continue;
+ }
+
+ // Otherwise, check the spelling.
+ if (PP.getSpelling(A) != PP.getSpelling(B))
+ return false;
+ }
+
+ return true;
+}
+
+LLVM_DUMP_METHOD void MacroInfo::dump() const {
+ llvm::raw_ostream &Out = llvm::errs();
+
+ // FIXME: Dump locations.
+ Out << "MacroInfo " << this;
+ if (IsBuiltinMacro) Out << " builtin";
+ if (IsDisabled) Out << " disabled";
+ if (IsUsed) Out << " used";
+ if (IsAllowRedefinitionsWithoutWarning)
+ Out << " allow_redefinitions_without_warning";
+ if (IsWarnIfUnused) Out << " warn_if_unused";
+ if (UsedForHeaderGuard) Out << " header_guard";
+
+ Out << "\n #define <macro>";
+ if (IsFunctionLike) {
+ Out << "(";
+ for (unsigned I = 0; I != NumParameters; ++I) {
+ if (I) Out << ", ";
+ Out << ParameterList[I]->getName();
+ }
+ if (IsC99Varargs || IsGNUVarargs) {
+ if (NumParameters && IsC99Varargs) Out << ", ";
+ Out << "...";
+ }
+ Out << ")";
+ }
+
+ bool First = true;
+ for (const Token &Tok : ReplacementTokens) {
+ // Leading space is semantically meaningful in a macro definition,
+ // so preserve it in the dump output.
+ if (First || Tok.hasLeadingSpace())
+ Out << " ";
+ First = false;
+
+ if (const char *Punc = tok::getPunctuatorSpelling(Tok.getKind()))
+ Out << Punc;
+ else if (Tok.isLiteral() && Tok.getLiteralData())
+ Out << StringRef(Tok.getLiteralData(), Tok.getLength());
+ else if (auto *II = Tok.getIdentifierInfo())
+ Out << II->getName();
+ else
+ Out << Tok.getName();
+ }
+}
+
+MacroDirective::DefInfo MacroDirective::getDefinition() {
+ MacroDirective *MD = this;
+ SourceLocation UndefLoc;
+ Optional<bool> isPublic;
+ for (; MD; MD = MD->getPrevious()) {
+ if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
+ return DefInfo(DefMD, UndefLoc,
+ !isPublic.hasValue() || isPublic.getValue());
+
+ if (UndefMacroDirective *UndefMD = dyn_cast<UndefMacroDirective>(MD)) {
+ UndefLoc = UndefMD->getLocation();
+ continue;
+ }
+
+ VisibilityMacroDirective *VisMD = cast<VisibilityMacroDirective>(MD);
+ if (!isPublic.hasValue())
+ isPublic = VisMD->isPublic();
+ }
+
+ return DefInfo(nullptr, UndefLoc,
+ !isPublic.hasValue() || isPublic.getValue());
+}
+
+const MacroDirective::DefInfo
+MacroDirective::findDirectiveAtLoc(SourceLocation L,
+ const SourceManager &SM) const {
+ assert(L.isValid() && "SourceLocation is invalid.");
+ for (DefInfo Def = getDefinition(); Def; Def = Def.getPreviousDefinition()) {
+ if (Def.getLocation().isInvalid() || // For macros defined on the command line.
+ SM.isBeforeInTranslationUnit(Def.getLocation(), L))
+ return (!Def.isUndefined() ||
+ SM.isBeforeInTranslationUnit(L, Def.getUndefLocation()))
+ ? Def : DefInfo();
+ }
+ return DefInfo();
+}
+
+LLVM_DUMP_METHOD void MacroDirective::dump() const {
+ llvm::raw_ostream &Out = llvm::errs();
+
+ switch (getKind()) {
+ case MD_Define: Out << "DefMacroDirective"; break;
+ case MD_Undefine: Out << "UndefMacroDirective"; break;
+ case MD_Visibility: Out << "VisibilityMacroDirective"; break;
+ }
+ Out << " " << this;
+ // FIXME: Dump SourceLocation.
+ if (auto *Prev = getPrevious())
+ Out << " prev " << Prev;
+ if (IsFromPCH) Out << " from_pch";
+
+ if (isa<VisibilityMacroDirective>(this))
+ Out << (IsPublic ? " public" : " private");
+
+ if (auto *DMD = dyn_cast<DefMacroDirective>(this)) {
+ if (auto *Info = DMD->getInfo()) {
+ Out << "\n ";
+ Info->dump();
+ }
+ }
+ Out << "\n";
+}
+
+ModuleMacro *ModuleMacro::create(Preprocessor &PP, Module *OwningModule,
+ IdentifierInfo *II, MacroInfo *Macro,
+ ArrayRef<ModuleMacro *> Overrides) {
+ void *Mem = PP.getPreprocessorAllocator().Allocate(
+ sizeof(ModuleMacro) + sizeof(ModuleMacro *) * Overrides.size(),
+ alignof(ModuleMacro));
+ return new (Mem) ModuleMacro(OwningModule, II, Macro, Overrides);
+}
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
new file mode 100644
index 000000000000..db59629997ee
--- /dev/null
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -0,0 +1,3010 @@
+//===- ModuleMap.cpp - Describe the layout of modules ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ModuleMap implementation, which describes the layout
+// of a module as it relates to headers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/ModuleMap.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <system_error>
+#include <utility>
+
+using namespace clang;
+
+void ModuleMapCallbacks::anchor() {}
+
+void ModuleMap::resolveLinkAsDependencies(Module *Mod) {
+ auto PendingLinkAs = PendingLinkAsModule.find(Mod->Name);
+ if (PendingLinkAs != PendingLinkAsModule.end()) {
+ for (auto &Name : PendingLinkAs->second) {
+ auto *M = findModule(Name.getKey());
+ if (M)
+ M->UseExportAsModuleLinkName = true;
+ }
+ }
+}
+
+void ModuleMap::addLinkAsDependency(Module *Mod) {
+ if (findModule(Mod->ExportAsModule))
+ Mod->UseExportAsModuleLinkName = true;
+ else
+ PendingLinkAsModule[Mod->ExportAsModule].insert(Mod->Name);
+}
+
+Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) {
+ switch ((int)Role) {
+ default: llvm_unreachable("unknown header role");
+ case NormalHeader:
+ return Module::HK_Normal;
+ case PrivateHeader:
+ return Module::HK_Private;
+ case TextualHeader:
+ return Module::HK_Textual;
+ case PrivateHeader | TextualHeader:
+ return Module::HK_PrivateTextual;
+ }
+}
+
+ModuleMap::ModuleHeaderRole
+ModuleMap::headerKindToRole(Module::HeaderKind Kind) {
+ switch ((int)Kind) {
+ case Module::HK_Normal:
+ return NormalHeader;
+ case Module::HK_Private:
+ return PrivateHeader;
+ case Module::HK_Textual:
+ return TextualHeader;
+ case Module::HK_PrivateTextual:
+ return ModuleHeaderRole(PrivateHeader | TextualHeader);
+ case Module::HK_Excluded:
+ llvm_unreachable("unexpected header kind");
+ }
+ llvm_unreachable("unknown header kind");
+}
+
+Module::ExportDecl
+ModuleMap::resolveExport(Module *Mod,
+ const Module::UnresolvedExportDecl &Unresolved,
+ bool Complain) const {
+ // We may have just a wildcard.
+ if (Unresolved.Id.empty()) {
+ assert(Unresolved.Wildcard && "Invalid unresolved export");
+ return Module::ExportDecl(nullptr, true);
+ }
+
+ // Resolve the module-id.
+ Module *Context = resolveModuleId(Unresolved.Id, Mod, Complain);
+ if (!Context)
+ return {};
+
+ return Module::ExportDecl(Context, Unresolved.Wildcard);
+}
+
+Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod,
+ bool Complain) const {
+ // Find the starting module.
+ Module *Context = lookupModuleUnqualified(Id[0].first, Mod);
+ if (!Context) {
+ if (Complain)
+ Diags.Report(Id[0].second, diag::err_mmap_missing_module_unqualified)
+ << Id[0].first << Mod->getFullModuleName();
+
+ return nullptr;
+ }
+
+ // Dig into the module path.
+ for (unsigned I = 1, N = Id.size(); I != N; ++I) {
+ Module *Sub = lookupModuleQualified(Id[I].first, Context);
+ if (!Sub) {
+ if (Complain)
+ Diags.Report(Id[I].second, diag::err_mmap_missing_module_qualified)
+ << Id[I].first << Context->getFullModuleName()
+ << SourceRange(Id[0].second, Id[I-1].second);
+
+ return nullptr;
+ }
+
+ Context = Sub;
+ }
+
+ return Context;
+}
+
+/// Append to \p Paths the set of paths needed to get to the
+/// subframework in which the given module lives.
+static void appendSubframeworkPaths(Module *Mod,
+ SmallVectorImpl<char> &Path) {
+ // Collect the framework names from the given module to the top-level module.
+ SmallVector<StringRef, 2> Paths;
+ for (; Mod; Mod = Mod->Parent) {
+ if (Mod->IsFramework)
+ Paths.push_back(Mod->Name);
+ }
+
+ if (Paths.empty())
+ return;
+
+ // Add Frameworks/Name.framework for each subframework.
+ for (unsigned I = Paths.size() - 1; I != 0; --I)
+ llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework");
+}
+
+const FileEntry *ModuleMap::findHeader(
+ Module *M, const Module::UnresolvedHeaderDirective &Header,
+ SmallVectorImpl<char> &RelativePathName, bool &NeedsFramework) {
+ // Search for the header file within the module's home directory.
+ auto *Directory = M->Directory;
+ SmallString<128> FullPathName(Directory->getName());
+
+ auto GetFile = [&](StringRef Filename) -> const FileEntry * {
+ auto File = SourceMgr.getFileManager().getFile(Filename);
+ if (!File ||
+ (Header.Size && (*File)->getSize() != *Header.Size) ||
+ (Header.ModTime && (*File)->getModificationTime() != *Header.ModTime))
+ return nullptr;
+ return *File;
+ };
+
+ auto GetFrameworkFile = [&]() -> const FileEntry * {
+ unsigned FullPathLength = FullPathName.size();
+ appendSubframeworkPaths(M, RelativePathName);
+ unsigned RelativePathLength = RelativePathName.size();
+
+ // Check whether this file is in the public headers.
+ llvm::sys::path::append(RelativePathName, "Headers", Header.FileName);
+ llvm::sys::path::append(FullPathName, RelativePathName);
+ if (auto *File = GetFile(FullPathName))
+ return File;
+
+ // Check whether this file is in the private headers.
+ // Ideally, private modules in the form 'FrameworkName.Private' should
+ // be defined as 'module FrameworkName.Private', and not as
+ // 'framework module FrameworkName.Private', since a 'Private.Framework'
+ // does not usually exist. However, since both are currently widely used
+ // for private modules, make sure we find the right path in both cases.
+ if (M->IsFramework && M->Name == "Private")
+ RelativePathName.clear();
+ else
+ RelativePathName.resize(RelativePathLength);
+ FullPathName.resize(FullPathLength);
+ llvm::sys::path::append(RelativePathName, "PrivateHeaders",
+ Header.FileName);
+ llvm::sys::path::append(FullPathName, RelativePathName);
+ return GetFile(FullPathName);
+ };
+
+ if (llvm::sys::path::is_absolute(Header.FileName)) {
+ RelativePathName.clear();
+ RelativePathName.append(Header.FileName.begin(), Header.FileName.end());
+ return GetFile(Header.FileName);
+ }
+
+ if (M->isPartOfFramework())
+ return GetFrameworkFile();
+
+ // Lookup for normal headers.
+ llvm::sys::path::append(RelativePathName, Header.FileName);
+ llvm::sys::path::append(FullPathName, RelativePathName);
+ auto *NormalHdrFile = GetFile(FullPathName);
+
+ if (M && !NormalHdrFile && Directory->getName().endswith(".framework")) {
+ // The lack of 'framework' keyword in a module declaration it's a simple
+ // mistake we can diagnose when the header exists within the proper
+ // framework style path.
+ FullPathName.assign(Directory->getName());
+ RelativePathName.clear();
+ if (GetFrameworkFile()) {
+ Diags.Report(Header.FileNameLoc,
+ diag::warn_mmap_incomplete_framework_module_declaration)
+ << Header.FileName << M->getFullModuleName();
+ NeedsFramework = true;
+ }
+ return nullptr;
+ }
+
+ return NormalHdrFile;
+}
+
+void ModuleMap::resolveHeader(Module *Mod,
+ const Module::UnresolvedHeaderDirective &Header,
+ bool &NeedsFramework) {
+ SmallString<128> RelativePathName;
+ if (const FileEntry *File =
+ findHeader(Mod, Header, RelativePathName, NeedsFramework)) {
+ if (Header.IsUmbrella) {
+ const DirectoryEntry *UmbrellaDir = File->getDir();
+ if (Module *UmbrellaMod = UmbrellaDirs[UmbrellaDir])
+ Diags.Report(Header.FileNameLoc, diag::err_mmap_umbrella_clash)
+ << UmbrellaMod->getFullModuleName();
+ else
+ // Record this umbrella header.
+ setUmbrellaHeader(Mod, File, RelativePathName.str());
+ } else {
+ Module::Header H = {RelativePathName.str(), File};
+ if (Header.Kind == Module::HK_Excluded)
+ excludeHeader(Mod, H);
+ else
+ addHeader(Mod, H, headerKindToRole(Header.Kind));
+ }
+ } else if (Header.HasBuiltinHeader && !Header.Size && !Header.ModTime) {
+ // There's a builtin header but no corresponding on-disk header. Assume
+ // this was supposed to modularize the builtin header alone.
+ } else if (Header.Kind == Module::HK_Excluded) {
+ // Ignore missing excluded header files. They're optional anyway.
+ } else {
+ // If we find a module that has a missing header, we mark this module as
+ // unavailable and store the header directive for displaying diagnostics.
+ Mod->MissingHeaders.push_back(Header);
+ // A missing header with stat information doesn't make the module
+ // unavailable; this keeps our behavior consistent as headers are lazily
+ // resolved. (Such a module still can't be built though, except from
+ // preprocessed source.)
+ if (!Header.Size && !Header.ModTime)
+ Mod->markUnavailable();
+ }
+}
+
+bool ModuleMap::resolveAsBuiltinHeader(
+ Module *Mod, const Module::UnresolvedHeaderDirective &Header) {
+ if (Header.Kind == Module::HK_Excluded ||
+ llvm::sys::path::is_absolute(Header.FileName) ||
+ Mod->isPartOfFramework() || !Mod->IsSystem || Header.IsUmbrella ||
+ !BuiltinIncludeDir || BuiltinIncludeDir == Mod->Directory ||
+ !isBuiltinHeader(Header.FileName))
+ return false;
+
+ // This is a system module with a top-level header. This header
+ // may have a counterpart (or replacement) in the set of headers
+ // supplied by Clang. Find that builtin header.
+ SmallString<128> Path;
+ llvm::sys::path::append(Path, BuiltinIncludeDir->getName(), Header.FileName);
+ auto File = SourceMgr.getFileManager().getFile(Path);
+ if (!File)
+ return false;
+
+ auto Role = headerKindToRole(Header.Kind);
+ Module::Header H = {Path.str(), *File};
+ addHeader(Mod, H, Role);
+ return true;
+}
+
+ModuleMap::ModuleMap(SourceManager &SourceMgr, DiagnosticsEngine &Diags,
+ const LangOptions &LangOpts, const TargetInfo *Target,
+ HeaderSearch &HeaderInfo)
+ : SourceMgr(SourceMgr), Diags(Diags), LangOpts(LangOpts), Target(Target),
+ HeaderInfo(HeaderInfo) {
+ MMapLangOpts.LineComment = true;
+}
+
+ModuleMap::~ModuleMap() {
+ for (auto &M : Modules)
+ delete M.getValue();
+ for (auto *M : ShadowModules)
+ delete M;
+}
+
+void ModuleMap::setTarget(const TargetInfo &Target) {
+ assert((!this->Target || this->Target == &Target) &&
+ "Improper target override");
+ this->Target = &Target;
+}
+
+/// "Sanitize" a filename so that it can be used as an identifier.
+static StringRef sanitizeFilenameAsIdentifier(StringRef Name,
+ SmallVectorImpl<char> &Buffer) {
+ if (Name.empty())
+ return Name;
+
+ if (!isValidIdentifier(Name)) {
+ // If we don't already have something with the form of an identifier,
+ // create a buffer with the sanitized name.
+ Buffer.clear();
+ if (isDigit(Name[0]))
+ Buffer.push_back('_');
+ Buffer.reserve(Buffer.size() + Name.size());
+ for (unsigned I = 0, N = Name.size(); I != N; ++I) {
+ if (isIdentifierBody(Name[I]))
+ Buffer.push_back(Name[I]);
+ else
+ Buffer.push_back('_');
+ }
+
+ Name = StringRef(Buffer.data(), Buffer.size());
+ }
+
+ while (llvm::StringSwitch<bool>(Name)
+#define KEYWORD(Keyword,Conditions) .Case(#Keyword, true)
+#define ALIAS(Keyword, AliasOf, Conditions) .Case(Keyword, true)
+#include "clang/Basic/TokenKinds.def"
+ .Default(false)) {
+ if (Name.data() != Buffer.data())
+ Buffer.append(Name.begin(), Name.end());
+ Buffer.push_back('_');
+ Name = StringRef(Buffer.data(), Buffer.size());
+ }
+
+ return Name;
+}
+
+/// Determine whether the given file name is the name of a builtin
+/// header, supplied by Clang to replace, override, or augment existing system
+/// headers.
+bool ModuleMap::isBuiltinHeader(StringRef FileName) {
+ return llvm::StringSwitch<bool>(FileName)
+ .Case("float.h", true)
+ .Case("iso646.h", true)
+ .Case("limits.h", true)
+ .Case("stdalign.h", true)
+ .Case("stdarg.h", true)
+ .Case("stdatomic.h", true)
+ .Case("stdbool.h", true)
+ .Case("stddef.h", true)
+ .Case("stdint.h", true)
+ .Case("tgmath.h", true)
+ .Case("unwind.h", true)
+ .Default(false);
+}
+
+ModuleMap::HeadersMap::iterator
+ModuleMap::findKnownHeader(const FileEntry *File) {
+ resolveHeaderDirectives(File);
+ HeadersMap::iterator Known = Headers.find(File);
+ if (HeaderInfo.getHeaderSearchOpts().ImplicitModuleMaps &&
+ Known == Headers.end() && File->getDir() == BuiltinIncludeDir &&
+ ModuleMap::isBuiltinHeader(llvm::sys::path::filename(File->getName()))) {
+ HeaderInfo.loadTopLevelSystemModules();
+ return Headers.find(File);
+ }
+ return Known;
+}
+
+ModuleMap::KnownHeader
+ModuleMap::findHeaderInUmbrellaDirs(const FileEntry *File,
+ SmallVectorImpl<const DirectoryEntry *> &IntermediateDirs) {
+ if (UmbrellaDirs.empty())
+ return {};
+
+ const DirectoryEntry *Dir = File->getDir();
+ assert(Dir && "file in no directory");
+
+ // Note: as an egregious but useful hack we use the real path here, because
+ // frameworks moving from top-level frameworks to embedded frameworks tend
+ // to be symlinked from the top-level location to the embedded location,
+ // and we need to resolve lookups as if we had found the embedded location.
+ StringRef DirName = SourceMgr.getFileManager().getCanonicalName(Dir);
+
+ // Keep walking up the directory hierarchy, looking for a directory with
+ // an umbrella header.
+ do {
+ auto KnownDir = UmbrellaDirs.find(Dir);
+ if (KnownDir != UmbrellaDirs.end())
+ return KnownHeader(KnownDir->second, NormalHeader);
+
+ IntermediateDirs.push_back(Dir);
+
+ // Retrieve our parent path.
+ DirName = llvm::sys::path::parent_path(DirName);
+ if (DirName.empty())
+ break;
+
+ // Resolve the parent path to a directory entry.
+ if (auto DirEntry = SourceMgr.getFileManager().getDirectory(DirName))
+ Dir = *DirEntry;
+ else
+ Dir = nullptr;
+ } while (Dir);
+ return {};
+}
+
+static bool violatesPrivateInclude(Module *RequestingModule,
+ const FileEntry *IncFileEnt,
+ ModuleMap::KnownHeader Header) {
+#ifndef NDEBUG
+ if (Header.getRole() & ModuleMap::PrivateHeader) {
+ // Check for consistency between the module header role
+ // as obtained from the lookup and as obtained from the module.
+ // This check is not cheap, so enable it only for debugging.
+ bool IsPrivate = false;
+ SmallVectorImpl<Module::Header> *HeaderList[] = {
+ &Header.getModule()->Headers[Module::HK_Private],
+ &Header.getModule()->Headers[Module::HK_PrivateTextual]};
+ for (auto *Hs : HeaderList)
+ IsPrivate |=
+ std::find_if(Hs->begin(), Hs->end(), [&](const Module::Header &H) {
+ return H.Entry == IncFileEnt;
+ }) != Hs->end();
+ assert(IsPrivate && "inconsistent headers and roles");
+ }
+#endif
+ return !Header.isAccessibleFrom(RequestingModule);
+}
+
+static Module *getTopLevelOrNull(Module *M) {
+ return M ? M->getTopLevelModule() : nullptr;
+}
+
+void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,
+ bool RequestingModuleIsModuleInterface,
+ SourceLocation FilenameLoc,
+ StringRef Filename,
+ const FileEntry *File) {
+ // No errors for indirect modules. This may be a bit of a problem for modules
+ // with no source files.
+ if (getTopLevelOrNull(RequestingModule) != getTopLevelOrNull(SourceModule))
+ return;
+
+ if (RequestingModule) {
+ resolveUses(RequestingModule, /*Complain=*/false);
+ resolveHeaderDirectives(RequestingModule);
+ }
+
+ bool Excluded = false;
+ Module *Private = nullptr;
+ Module *NotUsed = nullptr;
+
+ HeadersMap::iterator Known = findKnownHeader(File);
+ if (Known != Headers.end()) {
+ for (const KnownHeader &Header : Known->second) {
+ // Remember private headers for later printing of a diagnostic.
+ if (violatesPrivateInclude(RequestingModule, File, Header)) {
+ Private = Header.getModule();
+ continue;
+ }
+
+ // If uses need to be specified explicitly, we are only allowed to return
+ // modules that are explicitly used by the requesting module.
+ if (RequestingModule && LangOpts.ModulesDeclUse &&
+ !RequestingModule->directlyUses(Header.getModule())) {
+ NotUsed = Header.getModule();
+ continue;
+ }
+
+ // We have found a module that we can happily use.
+ return;
+ }
+
+ Excluded = true;
+ }
+
+ // We have found a header, but it is private.
+ if (Private) {
+ Diags.Report(FilenameLoc, diag::warn_use_of_private_header_outside_module)
+ << Filename;
+ return;
+ }
+
+ // We have found a module, but we don't use it.
+ if (NotUsed) {
+ Diags.Report(FilenameLoc, diag::err_undeclared_use_of_module)
+ << RequestingModule->getTopLevelModule()->Name << Filename;
+ return;
+ }
+
+ if (Excluded || isHeaderInUmbrellaDirs(File))
+ return;
+
+ // At this point, only non-modular includes remain.
+
+ if (RequestingModule && LangOpts.ModulesStrictDeclUse) {
+ Diags.Report(FilenameLoc, diag::err_undeclared_use_of_module)
+ << RequestingModule->getTopLevelModule()->Name << Filename;
+ } else if (RequestingModule && RequestingModuleIsModuleInterface &&
+ LangOpts.isCompilingModule()) {
+ // Do not diagnose when we are not compiling a module.
+ diag::kind DiagID = RequestingModule->getTopLevelModule()->IsFramework ?
+ diag::warn_non_modular_include_in_framework_module :
+ diag::warn_non_modular_include_in_module;
+ Diags.Report(FilenameLoc, DiagID) << RequestingModule->getFullModuleName()
+ << File->getName();
+ }
+}
+
+static bool isBetterKnownHeader(const ModuleMap::KnownHeader &New,
+ const ModuleMap::KnownHeader &Old) {
+ // Prefer available modules.
+ if (New.getModule()->isAvailable() && !Old.getModule()->isAvailable())
+ return true;
+
+ // Prefer a public header over a private header.
+ if ((New.getRole() & ModuleMap::PrivateHeader) !=
+ (Old.getRole() & ModuleMap::PrivateHeader))
+ return !(New.getRole() & ModuleMap::PrivateHeader);
+
+ // Prefer a non-textual header over a textual header.
+ if ((New.getRole() & ModuleMap::TextualHeader) !=
+ (Old.getRole() & ModuleMap::TextualHeader))
+ return !(New.getRole() & ModuleMap::TextualHeader);
+
+ // Don't have a reason to choose between these. Just keep the first one.
+ return false;
+}
+
+ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File,
+ bool AllowTextual) {
+ auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader {
+ if (!AllowTextual && R.getRole() & ModuleMap::TextualHeader)
+ return {};
+ return R;
+ };
+
+ HeadersMap::iterator Known = findKnownHeader(File);
+ if (Known != Headers.end()) {
+ ModuleMap::KnownHeader Result;
+ // Iterate over all modules that 'File' is part of to find the best fit.
+ for (KnownHeader &H : Known->second) {
+ // Prefer a header from the source module over all others.
+ if (H.getModule()->getTopLevelModule() == SourceModule)
+ return MakeResult(H);
+ if (!Result || isBetterKnownHeader(H, Result))
+ Result = H;
+ }
+ return MakeResult(Result);
+ }
+
+ return MakeResult(findOrCreateModuleForHeaderInUmbrellaDir(File));
+}
+
+ModuleMap::KnownHeader
+ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) {
+ assert(!Headers.count(File) && "already have a module for this header");
+
+ SmallVector<const DirectoryEntry *, 2> SkippedDirs;
+ KnownHeader H = findHeaderInUmbrellaDirs(File, SkippedDirs);
+ if (H) {
+ Module *Result = H.getModule();
+
+ // Search up the module stack until we find a module with an umbrella
+ // directory.
+ Module *UmbrellaModule = Result;
+ while (!UmbrellaModule->getUmbrellaDir() && UmbrellaModule->Parent)
+ UmbrellaModule = UmbrellaModule->Parent;
+
+ if (UmbrellaModule->InferSubmodules) {
+ const FileEntry *UmbrellaModuleMap =
+ getModuleMapFileForUniquing(UmbrellaModule);
+
+ // Infer submodules for each of the directories we found between
+ // the directory of the umbrella header and the directory where
+ // the actual header is located.
+ bool Explicit = UmbrellaModule->InferExplicitSubmodules;
+
+ for (unsigned I = SkippedDirs.size(); I != 0; --I) {
+ // Find or create the module that corresponds to this directory name.
+ SmallString<32> NameBuf;
+ StringRef Name = sanitizeFilenameAsIdentifier(
+ llvm::sys::path::stem(SkippedDirs[I-1]->getName()), NameBuf);
+ Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
+ Explicit).first;
+ InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
+ Result->IsInferred = true;
+
+ // Associate the module and the directory.
+ UmbrellaDirs[SkippedDirs[I-1]] = Result;
+
+ // If inferred submodules export everything they import, add a
+ // wildcard to the set of exports.
+ if (UmbrellaModule->InferExportWildcard && Result->Exports.empty())
+ Result->Exports.push_back(Module::ExportDecl(nullptr, true));
+ }
+
+ // Infer a submodule with the same name as this header file.
+ SmallString<32> NameBuf;
+ StringRef Name = sanitizeFilenameAsIdentifier(
+ llvm::sys::path::stem(File->getName()), NameBuf);
+ Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
+ Explicit).first;
+ InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
+ Result->IsInferred = true;
+ Result->addTopHeader(File);
+
+ // If inferred submodules export everything they import, add a
+ // wildcard to the set of exports.
+ if (UmbrellaModule->InferExportWildcard && Result->Exports.empty())
+ Result->Exports.push_back(Module::ExportDecl(nullptr, true));
+ } else {
+ // Record each of the directories we stepped through as being part of
+ // the module we found, since the umbrella header covers them all.
+ for (unsigned I = 0, N = SkippedDirs.size(); I != N; ++I)
+ UmbrellaDirs[SkippedDirs[I]] = Result;
+ }
+
+ KnownHeader Header(Result, NormalHeader);
+ Headers[File].push_back(Header);
+ return Header;
+ }
+
+ return {};
+}
+
+ArrayRef<ModuleMap::KnownHeader>
+ModuleMap::findAllModulesForHeader(const FileEntry *File) const {
+ resolveHeaderDirectives(File);
+ auto It = Headers.find(File);
+ if (It == Headers.end())
+ return None;
+ return It->second;
+}
+
+bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const {
+ return isHeaderUnavailableInModule(Header, nullptr);
+}
+
+bool
+ModuleMap::isHeaderUnavailableInModule(const FileEntry *Header,
+ const Module *RequestingModule) const {
+ resolveHeaderDirectives(Header);
+ HeadersMap::const_iterator Known = Headers.find(Header);
+ if (Known != Headers.end()) {
+ for (SmallVectorImpl<KnownHeader>::const_iterator
+ I = Known->second.begin(),
+ E = Known->second.end();
+ I != E; ++I) {
+
+ if (I->isAvailable() &&
+ (!RequestingModule ||
+ I->getModule()->isSubModuleOf(RequestingModule))) {
+ // When no requesting module is available, the caller is looking if a
+ // header is part a module by only looking into the module map. This is
+ // done by warn_uncovered_module_header checks; don't consider textual
+ // headers part of it in this mode, otherwise we get misleading warnings
+ // that a umbrella header is not including a textual header.
+ if (!RequestingModule && I->getRole() == ModuleMap::TextualHeader)
+ continue;
+ return false;
+ }
+ }
+ return true;
+ }
+
+ const DirectoryEntry *Dir = Header->getDir();
+ SmallVector<const DirectoryEntry *, 2> SkippedDirs;
+ StringRef DirName = Dir->getName();
+
+ auto IsUnavailable = [&](const Module *M) {
+ return !M->isAvailable() && (!RequestingModule ||
+ M->isSubModuleOf(RequestingModule));
+ };
+
+ // Keep walking up the directory hierarchy, looking for a directory with
+ // an umbrella header.
+ do {
+ llvm::DenseMap<const DirectoryEntry *, Module *>::const_iterator KnownDir
+ = UmbrellaDirs.find(Dir);
+ if (KnownDir != UmbrellaDirs.end()) {
+ Module *Found = KnownDir->second;
+ if (IsUnavailable(Found))
+ return true;
+
+ // Search up the module stack until we find a module with an umbrella
+ // directory.
+ Module *UmbrellaModule = Found;
+ while (!UmbrellaModule->getUmbrellaDir() && UmbrellaModule->Parent)
+ UmbrellaModule = UmbrellaModule->Parent;
+
+ if (UmbrellaModule->InferSubmodules) {
+ for (unsigned I = SkippedDirs.size(); I != 0; --I) {
+ // Find or create the module that corresponds to this directory name.
+ SmallString<32> NameBuf;
+ StringRef Name = sanitizeFilenameAsIdentifier(
+ llvm::sys::path::stem(SkippedDirs[I-1]->getName()),
+ NameBuf);
+ Found = lookupModuleQualified(Name, Found);
+ if (!Found)
+ return false;
+ if (IsUnavailable(Found))
+ return true;
+ }
+
+ // Infer a submodule with the same name as this header file.
+ SmallString<32> NameBuf;
+ StringRef Name = sanitizeFilenameAsIdentifier(
+ llvm::sys::path::stem(Header->getName()),
+ NameBuf);
+ Found = lookupModuleQualified(Name, Found);
+ if (!Found)
+ return false;
+ }
+
+ return IsUnavailable(Found);
+ }
+
+ SkippedDirs.push_back(Dir);
+
+ // Retrieve our parent path.
+ DirName = llvm::sys::path::parent_path(DirName);
+ if (DirName.empty())
+ break;
+
+ // Resolve the parent path to a directory entry.
+ if (auto DirEntry = SourceMgr.getFileManager().getDirectory(DirName))
+ Dir = *DirEntry;
+ else
+ Dir = nullptr;
+ } while (Dir);
+
+ return false;
+}
+
+Module *ModuleMap::findModule(StringRef Name) const {
+ llvm::StringMap<Module *>::const_iterator Known = Modules.find(Name);
+ if (Known != Modules.end())
+ return Known->getValue();
+
+ return nullptr;
+}
+
+Module *ModuleMap::lookupModuleUnqualified(StringRef Name,
+ Module *Context) const {
+ for(; Context; Context = Context->Parent) {
+ if (Module *Sub = lookupModuleQualified(Name, Context))
+ return Sub;
+ }
+
+ return findModule(Name);
+}
+
+Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
+ if (!Context)
+ return findModule(Name);
+
+ return Context->findSubmodule(Name);
+}
+
+std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
+ Module *Parent,
+ bool IsFramework,
+ bool IsExplicit) {
+ // Try to find an existing module with this name.
+ if (Module *Sub = lookupModuleQualified(Name, Parent))
+ return std::make_pair(Sub, false);
+
+ // Create a new module with this name.
+ Module *Result = new Module(Name, SourceLocation(), Parent, IsFramework,
+ IsExplicit, NumCreatedModules++);
+ if (!Parent) {
+ if (LangOpts.CurrentModule == Name)
+ SourceModule = Result;
+ Modules[Name] = Result;
+ ModuleScopeIDs[Result] = CurrentModuleScopeID;
+ }
+ return std::make_pair(Result, true);
+}
+
+Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc) {
+ PendingSubmodules.emplace_back(
+ new Module("<global>", Loc, nullptr, /*IsFramework*/ false,
+ /*IsExplicit*/ true, NumCreatedModules++));
+ PendingSubmodules.back()->Kind = Module::GlobalModuleFragment;
+ return PendingSubmodules.back().get();
+}
+
+Module *
+ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent,
+ SourceLocation Loc) {
+ auto *Result =
+ new Module("<private>", Loc, Parent, /*IsFramework*/ false,
+ /*IsExplicit*/ true, NumCreatedModules++);
+ Result->Kind = Module::PrivateModuleFragment;
+ return Result;
+}
+
+Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc,
+ StringRef Name,
+ Module *GlobalModule) {
+ assert(LangOpts.CurrentModule == Name && "module name mismatch");
+ assert(!Modules[Name] && "redefining existing module");
+
+ auto *Result =
+ new Module(Name, Loc, nullptr, /*IsFramework*/ false,
+ /*IsExplicit*/ false, NumCreatedModules++);
+ Result->Kind = Module::ModuleInterfaceUnit;
+ Modules[Name] = SourceModule = Result;
+
+ // Reparent the current global module fragment as a submodule of this module.
+ for (auto &Submodule : PendingSubmodules) {
+ Submodule->setParent(Result);
+ Submodule.release(); // now owned by parent
+ }
+ PendingSubmodules.clear();
+
+ // Mark the main source file as being within the newly-created module so that
+ // declarations and macros are properly visibility-restricted to it.
+ auto *MainFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID());
+ assert(MainFile && "no input file for module interface");
+ Headers[MainFile].push_back(KnownHeader(Result, PrivateHeader));
+
+ return Result;
+}
+
+Module *ModuleMap::createHeaderModule(StringRef Name,
+ ArrayRef<Module::Header> Headers) {
+ assert(LangOpts.CurrentModule == Name && "module name mismatch");
+ assert(!Modules[Name] && "redefining existing module");
+
+ auto *Result =
+ new Module(Name, SourceLocation(), nullptr, /*IsFramework*/ false,
+ /*IsExplicit*/ false, NumCreatedModules++);
+ Result->Kind = Module::ModuleInterfaceUnit;
+ Modules[Name] = SourceModule = Result;
+
+ for (const Module::Header &H : Headers) {
+ auto *M = new Module(H.NameAsWritten, SourceLocation(), Result,
+ /*IsFramework*/ false,
+ /*IsExplicit*/ true, NumCreatedModules++);
+ // Header modules are implicitly 'export *'.
+ M->Exports.push_back(Module::ExportDecl(nullptr, true));
+ addHeader(M, H, NormalHeader);
+ }
+
+ return Result;
+}
+
+/// For a framework module, infer the framework against which we
+/// should link.
+static void inferFrameworkLink(Module *Mod, const DirectoryEntry *FrameworkDir,
+ FileManager &FileMgr) {
+ assert(Mod->IsFramework && "Can only infer linking for framework modules");
+ assert(!Mod->isSubFramework() &&
+ "Can only infer linking for top-level frameworks");
+
+ SmallString<128> LibName;
+ LibName += FrameworkDir->getName();
+ llvm::sys::path::append(LibName, Mod->Name);
+
+ // The library name of a framework has more than one possible extension since
+ // the introduction of the text-based dynamic library format. We need to check
+ // for both before we give up.
+ for (const char *extension : {"", ".tbd"}) {
+ llvm::sys::path::replace_extension(LibName, extension);
+ if (FileMgr.getFile(LibName)) {
+ Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name,
+ /*IsFramework=*/true));
+ return;
+ }
+ }
+}
+
+Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
+ bool IsSystem, Module *Parent) {
+ Attributes Attrs;
+ Attrs.IsSystem = IsSystem;
+ return inferFrameworkModule(FrameworkDir, Attrs, Parent);
+}
+
+Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
+ Attributes Attrs, Module *Parent) {
+ // Note: as an egregious but useful hack we use the real path here, because
+ // we might be looking at an embedded framework that symlinks out to a
+ // top-level framework, and we need to infer as if we were naming the
+ // top-level framework.
+ StringRef FrameworkDirName =
+ SourceMgr.getFileManager().getCanonicalName(FrameworkDir);
+
+ // In case this is a case-insensitive filesystem, use the canonical
+ // directory name as the ModuleName, since modules are case-sensitive.
+ // FIXME: we should be able to give a fix-it hint for the correct spelling.
+ SmallString<32> ModuleNameStorage;
+ StringRef ModuleName = sanitizeFilenameAsIdentifier(
+ llvm::sys::path::stem(FrameworkDirName), ModuleNameStorage);
+
+ // Check whether we've already found this module.
+ if (Module *Mod = lookupModuleQualified(ModuleName, Parent))
+ return Mod;
+
+ FileManager &FileMgr = SourceMgr.getFileManager();
+
+ // If the framework has a parent path from which we're allowed to infer
+ // a framework module, do so.
+ const FileEntry *ModuleMapFile = nullptr;
+ if (!Parent) {
+ // Determine whether we're allowed to infer a module map.
+ bool canInfer = false;
+ if (llvm::sys::path::has_parent_path(FrameworkDirName)) {
+ // Figure out the parent path.
+ StringRef Parent = llvm::sys::path::parent_path(FrameworkDirName);
+ if (auto ParentDir = FileMgr.getDirectory(Parent)) {
+ // Check whether we have already looked into the parent directory
+ // for a module map.
+ llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator
+ inferred = InferredDirectories.find(*ParentDir);
+ if (inferred == InferredDirectories.end()) {
+ // We haven't looked here before. Load a module map, if there is
+ // one.
+ bool IsFrameworkDir = Parent.endswith(".framework");
+ if (const FileEntry *ModMapFile =
+ HeaderInfo.lookupModuleMapFile(*ParentDir, IsFrameworkDir)) {
+ parseModuleMapFile(ModMapFile, Attrs.IsSystem, *ParentDir);
+ inferred = InferredDirectories.find(*ParentDir);
+ }
+
+ if (inferred == InferredDirectories.end())
+ inferred = InferredDirectories.insert(
+ std::make_pair(*ParentDir, InferredDirectory())).first;
+ }
+
+ if (inferred->second.InferModules) {
+ // We're allowed to infer for this directory, but make sure it's okay
+ // to infer this particular module.
+ StringRef Name = llvm::sys::path::stem(FrameworkDirName);
+ canInfer = std::find(inferred->second.ExcludedModules.begin(),
+ inferred->second.ExcludedModules.end(),
+ Name) == inferred->second.ExcludedModules.end();
+
+ Attrs.IsSystem |= inferred->second.Attrs.IsSystem;
+ Attrs.IsExternC |= inferred->second.Attrs.IsExternC;
+ Attrs.IsExhaustive |= inferred->second.Attrs.IsExhaustive;
+ Attrs.NoUndeclaredIncludes |=
+ inferred->second.Attrs.NoUndeclaredIncludes;
+ ModuleMapFile = inferred->second.ModuleMapFile;
+ }
+ }
+ }
+
+ // If we're not allowed to infer a framework module, don't.
+ if (!canInfer)
+ return nullptr;
+ } else
+ ModuleMapFile = getModuleMapFileForUniquing(Parent);
+
+
+ // Look for an umbrella header.
+ SmallString<128> UmbrellaName = StringRef(FrameworkDir->getName());
+ llvm::sys::path::append(UmbrellaName, "Headers", ModuleName + ".h");
+ auto UmbrellaHeader = FileMgr.getFile(UmbrellaName);
+
+ // FIXME: If there's no umbrella header, we could probably scan the
+ // framework to load *everything*. But, it's not clear that this is a good
+ // idea.
+ if (!UmbrellaHeader)
+ return nullptr;
+
+ Module *Result = new Module(ModuleName, SourceLocation(), Parent,
+ /*IsFramework=*/true, /*IsExplicit=*/false,
+ NumCreatedModules++);
+ InferredModuleAllowedBy[Result] = ModuleMapFile;
+ Result->IsInferred = true;
+ if (!Parent) {
+ if (LangOpts.CurrentModule == ModuleName)
+ SourceModule = Result;
+ Modules[ModuleName] = Result;
+ ModuleScopeIDs[Result] = CurrentModuleScopeID;
+ }
+
+ Result->IsSystem |= Attrs.IsSystem;
+ Result->IsExternC |= Attrs.IsExternC;
+ Result->ConfigMacrosExhaustive |= Attrs.IsExhaustive;
+ Result->NoUndeclaredIncludes |= Attrs.NoUndeclaredIncludes;
+ Result->Directory = FrameworkDir;
+
+ // umbrella header "umbrella-header-name"
+ //
+ // The "Headers/" component of the name is implied because this is
+ // a framework module.
+ setUmbrellaHeader(Result, *UmbrellaHeader, ModuleName + ".h");
+
+ // export *
+ Result->Exports.push_back(Module::ExportDecl(nullptr, true));
+
+ // module * { export * }
+ Result->InferSubmodules = true;
+ Result->InferExportWildcard = true;
+
+ // Look for subframeworks.
+ std::error_code EC;
+ SmallString<128> SubframeworksDirName
+ = StringRef(FrameworkDir->getName());
+ llvm::sys::path::append(SubframeworksDirName, "Frameworks");
+ llvm::sys::path::native(SubframeworksDirName);
+ llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
+ for (llvm::vfs::directory_iterator
+ Dir = FS.dir_begin(SubframeworksDirName, EC),
+ DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ if (!StringRef(Dir->path()).endswith(".framework"))
+ continue;
+
+ if (auto SubframeworkDir =
+ FileMgr.getDirectory(Dir->path())) {
+ // Note: as an egregious but useful hack, we use the real path here and
+ // check whether it is actually a subdirectory of the parent directory.
+ // This will not be the case if the 'subframework' is actually a symlink
+ // out to a top-level framework.
+ StringRef SubframeworkDirName =
+ FileMgr.getCanonicalName(*SubframeworkDir);
+ bool FoundParent = false;
+ do {
+ // Get the parent directory name.
+ SubframeworkDirName
+ = llvm::sys::path::parent_path(SubframeworkDirName);
+ if (SubframeworkDirName.empty())
+ break;
+
+ if (auto SubDir = FileMgr.getDirectory(SubframeworkDirName)) {
+ if (*SubDir == FrameworkDir) {
+ FoundParent = true;
+ break;
+ }
+ }
+ } while (true);
+
+ if (!FoundParent)
+ continue;
+
+ // FIXME: Do we want to warn about subframeworks without umbrella headers?
+ inferFrameworkModule(*SubframeworkDir, Attrs, Result);
+ }
+ }
+
+ // If the module is a top-level framework, automatically link against the
+ // framework.
+ if (!Result->isSubFramework()) {
+ inferFrameworkLink(Result, FrameworkDir, FileMgr);
+ }
+
+ return Result;
+}
+
+Module *ModuleMap::createShadowedModule(StringRef Name, bool IsFramework,
+ Module *ShadowingModule) {
+
+ // Create a new module with this name.
+ Module *Result =
+ new Module(Name, SourceLocation(), /*Parent=*/nullptr, IsFramework,
+ /*IsExplicit=*/false, NumCreatedModules++);
+ Result->ShadowingModule = ShadowingModule;
+ Result->IsAvailable = false;
+ ModuleScopeIDs[Result] = CurrentModuleScopeID;
+ ShadowModules.push_back(Result);
+
+ return Result;
+}
+
+void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader,
+ Twine NameAsWritten) {
+ Headers[UmbrellaHeader].push_back(KnownHeader(Mod, NormalHeader));
+ Mod->Umbrella = UmbrellaHeader;
+ Mod->UmbrellaAsWritten = NameAsWritten.str();
+ UmbrellaDirs[UmbrellaHeader->getDir()] = Mod;
+
+ // Notify callbacks that we just added a new header.
+ for (const auto &Cb : Callbacks)
+ Cb->moduleMapAddUmbrellaHeader(&SourceMgr.getFileManager(), UmbrellaHeader);
+}
+
+void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir,
+ Twine NameAsWritten) {
+ Mod->Umbrella = UmbrellaDir;
+ Mod->UmbrellaAsWritten = NameAsWritten.str();
+ UmbrellaDirs[UmbrellaDir] = Mod;
+}
+
+void ModuleMap::addUnresolvedHeader(Module *Mod,
+ Module::UnresolvedHeaderDirective Header,
+ bool &NeedsFramework) {
+ // If there is a builtin counterpart to this file, add it now so it can
+ // wrap the system header.
+ if (resolveAsBuiltinHeader(Mod, Header)) {
+ // If we have both a builtin and system version of the file, the
+ // builtin version may want to inject macros into the system header, so
+ // force the system header to be treated as a textual header in this
+ // case.
+ Header.Kind = headerRoleToKind(ModuleMap::ModuleHeaderRole(
+ headerKindToRole(Header.Kind) | ModuleMap::TextualHeader));
+ Header.HasBuiltinHeader = true;
+ }
+
+ // If possible, don't stat the header until we need to. This requires the
+ // user to have provided us with some stat information about the file.
+ // FIXME: Add support for lazily stat'ing umbrella headers and excluded
+ // headers.
+ if ((Header.Size || Header.ModTime) && !Header.IsUmbrella &&
+ Header.Kind != Module::HK_Excluded) {
+ // We expect more variation in mtime than size, so if we're given both,
+ // use the mtime as the key.
+ if (Header.ModTime)
+ LazyHeadersByModTime[*Header.ModTime].push_back(Mod);
+ else
+ LazyHeadersBySize[*Header.Size].push_back(Mod);
+ Mod->UnresolvedHeaders.push_back(Header);
+ return;
+ }
+
+ // We don't have stat information or can't defer looking this file up.
+ // Perform the lookup now.
+ resolveHeader(Mod, Header, NeedsFramework);
+}
+
+void ModuleMap::resolveHeaderDirectives(const FileEntry *File) const {
+ auto BySize = LazyHeadersBySize.find(File->getSize());
+ if (BySize != LazyHeadersBySize.end()) {
+ for (auto *M : BySize->second)
+ resolveHeaderDirectives(M);
+ LazyHeadersBySize.erase(BySize);
+ }
+
+ auto ByModTime = LazyHeadersByModTime.find(File->getModificationTime());
+ if (ByModTime != LazyHeadersByModTime.end()) {
+ for (auto *M : ByModTime->second)
+ resolveHeaderDirectives(M);
+ LazyHeadersByModTime.erase(ByModTime);
+ }
+}
+
+void ModuleMap::resolveHeaderDirectives(Module *Mod) const {
+ bool NeedsFramework = false;
+ for (auto &Header : Mod->UnresolvedHeaders)
+ // This operation is logically const; we're just changing how we represent
+ // the header information for this file.
+ const_cast<ModuleMap*>(this)->resolveHeader(Mod, Header, NeedsFramework);
+ Mod->UnresolvedHeaders.clear();
+}
+
+void ModuleMap::addHeader(Module *Mod, Module::Header Header,
+ ModuleHeaderRole Role, bool Imported) {
+ KnownHeader KH(Mod, Role);
+
+ // Only add each header to the headers list once.
+ // FIXME: Should we diagnose if a header is listed twice in the
+ // same module definition?
+ auto &HeaderList = Headers[Header.Entry];
+ for (auto H : HeaderList)
+ if (H == KH)
+ return;
+
+ HeaderList.push_back(KH);
+ Mod->Headers[headerRoleToKind(Role)].push_back(Header);
+
+ bool isCompilingModuleHeader =
+ LangOpts.isCompilingModule() && Mod->getTopLevelModule() == SourceModule;
+ if (!Imported || isCompilingModuleHeader) {
+ // When we import HeaderFileInfo, the external source is expected to
+ // set the isModuleHeader flag itself.
+ HeaderInfo.MarkFileModuleHeader(Header.Entry, Role,
+ isCompilingModuleHeader);
+ }
+
+ // Notify callbacks that we just added a new header.
+ for (const auto &Cb : Callbacks)
+ Cb->moduleMapAddHeader(Header.Entry->getName());
+}
+
+void ModuleMap::excludeHeader(Module *Mod, Module::Header Header) {
+ // Add this as a known header so we won't implicitly add it to any
+ // umbrella directory module.
+ // FIXME: Should we only exclude it from umbrella modules within the
+ // specified module?
+ (void) Headers[Header.Entry];
+
+ Mod->Headers[Module::HK_Excluded].push_back(std::move(Header));
+}
+
+const FileEntry *
+ModuleMap::getContainingModuleMapFile(const Module *Module) const {
+ if (Module->DefinitionLoc.isInvalid())
+ return nullptr;
+
+ return SourceMgr.getFileEntryForID(
+ SourceMgr.getFileID(Module->DefinitionLoc));
+}
+
+const FileEntry *ModuleMap::getModuleMapFileForUniquing(const Module *M) const {
+ if (M->IsInferred) {
+ assert(InferredModuleAllowedBy.count(M) && "missing inferred module map");
+ return InferredModuleAllowedBy.find(M)->second;
+ }
+ return getContainingModuleMapFile(M);
+}
+
+void ModuleMap::setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap) {
+ assert(M->IsInferred && "module not inferred");
+ InferredModuleAllowedBy[M] = ModMap;
+}
+
+LLVM_DUMP_METHOD void ModuleMap::dump() {
+ llvm::errs() << "Modules:";
+ for (llvm::StringMap<Module *>::iterator M = Modules.begin(),
+ MEnd = Modules.end();
+ M != MEnd; ++M)
+ M->getValue()->print(llvm::errs(), 2);
+
+ llvm::errs() << "Headers:";
+ for (HeadersMap::iterator H = Headers.begin(), HEnd = Headers.end();
+ H != HEnd; ++H) {
+ llvm::errs() << " \"" << H->first->getName() << "\" -> ";
+ for (SmallVectorImpl<KnownHeader>::const_iterator I = H->second.begin(),
+ E = H->second.end();
+ I != E; ++I) {
+ if (I != H->second.begin())
+ llvm::errs() << ",";
+ llvm::errs() << I->getModule()->getFullModuleName();
+ }
+ llvm::errs() << "\n";
+ }
+}
+
+bool ModuleMap::resolveExports(Module *Mod, bool Complain) {
+ auto Unresolved = std::move(Mod->UnresolvedExports);
+ Mod->UnresolvedExports.clear();
+ for (auto &UE : Unresolved) {
+ Module::ExportDecl Export = resolveExport(Mod, UE, Complain);
+ if (Export.getPointer() || Export.getInt())
+ Mod->Exports.push_back(Export);
+ else
+ Mod->UnresolvedExports.push_back(UE);
+ }
+ return !Mod->UnresolvedExports.empty();
+}
+
+bool ModuleMap::resolveUses(Module *Mod, bool Complain) {
+ auto Unresolved = std::move(Mod->UnresolvedDirectUses);
+ Mod->UnresolvedDirectUses.clear();
+ for (auto &UDU : Unresolved) {
+ Module *DirectUse = resolveModuleId(UDU, Mod, Complain);
+ if (DirectUse)
+ Mod->DirectUses.push_back(DirectUse);
+ else
+ Mod->UnresolvedDirectUses.push_back(UDU);
+ }
+ return !Mod->UnresolvedDirectUses.empty();
+}
+
+bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) {
+ auto Unresolved = std::move(Mod->UnresolvedConflicts);
+ Mod->UnresolvedConflicts.clear();
+ for (auto &UC : Unresolved) {
+ if (Module *OtherMod = resolveModuleId(UC.Id, Mod, Complain)) {
+ Module::Conflict Conflict;
+ Conflict.Other = OtherMod;
+ Conflict.Message = UC.Message;
+ Mod->Conflicts.push_back(Conflict);
+ } else
+ Mod->UnresolvedConflicts.push_back(UC);
+ }
+ return !Mod->UnresolvedConflicts.empty();
+}
+
+//----------------------------------------------------------------------------//
+// Module map file parser
+//----------------------------------------------------------------------------//
+
+namespace clang {
+
+ /// A token in a module map file.
+ struct MMToken {
+ enum TokenKind {
+ Comma,
+ ConfigMacros,
+ Conflict,
+ EndOfFile,
+ HeaderKeyword,
+ Identifier,
+ Exclaim,
+ ExcludeKeyword,
+ ExplicitKeyword,
+ ExportKeyword,
+ ExportAsKeyword,
+ ExternKeyword,
+ FrameworkKeyword,
+ LinkKeyword,
+ ModuleKeyword,
+ Period,
+ PrivateKeyword,
+ UmbrellaKeyword,
+ UseKeyword,
+ RequiresKeyword,
+ Star,
+ StringLiteral,
+ IntegerLiteral,
+ TextualKeyword,
+ LBrace,
+ RBrace,
+ LSquare,
+ RSquare
+ } Kind;
+
+ unsigned Location;
+ unsigned StringLength;
+ union {
+ // If Kind != IntegerLiteral.
+ const char *StringData;
+
+ // If Kind == IntegerLiteral.
+ uint64_t IntegerValue;
+ };
+
+ void clear() {
+ Kind = EndOfFile;
+ Location = 0;
+ StringLength = 0;
+ StringData = nullptr;
+ }
+
+ bool is(TokenKind K) const { return Kind == K; }
+
+ SourceLocation getLocation() const {
+ return SourceLocation::getFromRawEncoding(Location);
+ }
+
+ uint64_t getInteger() const {
+ return Kind == IntegerLiteral ? IntegerValue : 0;
+ }
+
+ StringRef getString() const {
+ return Kind == IntegerLiteral ? StringRef()
+ : StringRef(StringData, StringLength);
+ }
+ };
+
+ class ModuleMapParser {
+ Lexer &L;
+ SourceManager &SourceMgr;
+
+ /// Default target information, used only for string literal
+ /// parsing.
+ const TargetInfo *Target;
+
+ DiagnosticsEngine &Diags;
+ ModuleMap &Map;
+
+ /// The current module map file.
+ const FileEntry *ModuleMapFile;
+
+ /// Source location of most recent parsed module declaration
+ SourceLocation CurrModuleDeclLoc;
+
+ /// The directory that file names in this module map file should
+ /// be resolved relative to.
+ const DirectoryEntry *Directory;
+
+ /// Whether this module map is in a system header directory.
+ bool IsSystem;
+
+ /// Whether an error occurred.
+ bool HadError = false;
+
+ /// Stores string data for the various string literals referenced
+ /// during parsing.
+ llvm::BumpPtrAllocator StringData;
+
+ /// The current token.
+ MMToken Tok;
+
+ /// The active module.
+ Module *ActiveModule = nullptr;
+
+ /// Whether a module uses the 'requires excluded' hack to mark its
+ /// contents as 'textual'.
+ ///
+ /// On older Darwin SDK versions, 'requires excluded' is used to mark the
+ /// contents of the Darwin.C.excluded (assert.h) and Tcl.Private modules as
+ /// non-modular headers. For backwards compatibility, we continue to
+ /// support this idiom for just these modules, and map the headers to
+ /// 'textual' to match the original intent.
+ llvm::SmallPtrSet<Module *, 2> UsesRequiresExcludedHack;
+
+ /// Consume the current token and return its location.
+ SourceLocation consumeToken();
+
+ /// Skip tokens until we reach the a token with the given kind
+ /// (or the end of the file).
+ void skipUntil(MMToken::TokenKind K);
+
+ using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>;
+
+ bool parseModuleId(ModuleId &Id);
+ void parseModuleDecl();
+ void parseExternModuleDecl();
+ void parseRequiresDecl();
+ void parseHeaderDecl(MMToken::TokenKind, SourceLocation LeadingLoc);
+ void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc);
+ void parseExportDecl();
+ void parseExportAsDecl();
+ void parseUseDecl();
+ void parseLinkDecl();
+ void parseConfigMacros();
+ void parseConflict();
+ void parseInferredModuleDecl(bool Framework, bool Explicit);
+
+ /// Private modules are canonicalized as Foo_Private. Clang provides extra
+ /// module map search logic to find the appropriate private module when PCH
+ /// is used with implicit module maps. Warn when private modules are written
+ /// in other ways (FooPrivate and Foo.Private), providing notes and fixits.
+ void diagnosePrivateModules(SourceLocation ExplicitLoc,
+ SourceLocation FrameworkLoc);
+
+ using Attributes = ModuleMap::Attributes;
+
+ bool parseOptionalAttributes(Attributes &Attrs);
+
+ public:
+ explicit ModuleMapParser(Lexer &L, SourceManager &SourceMgr,
+ const TargetInfo *Target, DiagnosticsEngine &Diags,
+ ModuleMap &Map, const FileEntry *ModuleMapFile,
+ const DirectoryEntry *Directory, bool IsSystem)
+ : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map),
+ ModuleMapFile(ModuleMapFile), Directory(Directory),
+ IsSystem(IsSystem) {
+ Tok.clear();
+ consumeToken();
+ }
+
+ bool parseModuleMapFile();
+
+ bool terminatedByDirective() { return false; }
+ SourceLocation getLocation() { return Tok.getLocation(); }
+ };
+
+} // namespace clang
+
+SourceLocation ModuleMapParser::consumeToken() {
+ SourceLocation Result = Tok.getLocation();
+
+retry:
+ Tok.clear();
+ Token LToken;
+ L.LexFromRawLexer(LToken);
+ Tok.Location = LToken.getLocation().getRawEncoding();
+ switch (LToken.getKind()) {
+ case tok::raw_identifier: {
+ StringRef RI = LToken.getRawIdentifier();
+ Tok.StringData = RI.data();
+ Tok.StringLength = RI.size();
+ Tok.Kind = llvm::StringSwitch<MMToken::TokenKind>(RI)
+ .Case("config_macros", MMToken::ConfigMacros)
+ .Case("conflict", MMToken::Conflict)
+ .Case("exclude", MMToken::ExcludeKeyword)
+ .Case("explicit", MMToken::ExplicitKeyword)
+ .Case("export", MMToken::ExportKeyword)
+ .Case("export_as", MMToken::ExportAsKeyword)
+ .Case("extern", MMToken::ExternKeyword)
+ .Case("framework", MMToken::FrameworkKeyword)
+ .Case("header", MMToken::HeaderKeyword)
+ .Case("link", MMToken::LinkKeyword)
+ .Case("module", MMToken::ModuleKeyword)
+ .Case("private", MMToken::PrivateKeyword)
+ .Case("requires", MMToken::RequiresKeyword)
+ .Case("textual", MMToken::TextualKeyword)
+ .Case("umbrella", MMToken::UmbrellaKeyword)
+ .Case("use", MMToken::UseKeyword)
+ .Default(MMToken::Identifier);
+ break;
+ }
+
+ case tok::comma:
+ Tok.Kind = MMToken::Comma;
+ break;
+
+ case tok::eof:
+ Tok.Kind = MMToken::EndOfFile;
+ break;
+
+ case tok::l_brace:
+ Tok.Kind = MMToken::LBrace;
+ break;
+
+ case tok::l_square:
+ Tok.Kind = MMToken::LSquare;
+ break;
+
+ case tok::period:
+ Tok.Kind = MMToken::Period;
+ break;
+
+ case tok::r_brace:
+ Tok.Kind = MMToken::RBrace;
+ break;
+
+ case tok::r_square:
+ Tok.Kind = MMToken::RSquare;
+ break;
+
+ case tok::star:
+ Tok.Kind = MMToken::Star;
+ break;
+
+ case tok::exclaim:
+ Tok.Kind = MMToken::Exclaim;
+ break;
+
+ case tok::string_literal: {
+ if (LToken.hasUDSuffix()) {
+ Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl);
+ HadError = true;
+ goto retry;
+ }
+
+ // Parse the string literal.
+ LangOptions LangOpts;
+ StringLiteralParser StringLiteral(LToken, SourceMgr, LangOpts, *Target);
+ if (StringLiteral.hadError)
+ goto retry;
+
+ // Copy the string literal into our string data allocator.
+ unsigned Length = StringLiteral.GetStringLength();
+ char *Saved = StringData.Allocate<char>(Length + 1);
+ memcpy(Saved, StringLiteral.GetString().data(), Length);
+ Saved[Length] = 0;
+
+ // Form the token.
+ Tok.Kind = MMToken::StringLiteral;
+ Tok.StringData = Saved;
+ Tok.StringLength = Length;
+ break;
+ }
+
+ case tok::numeric_constant: {
+ // We don't support any suffixes or other complications.
+ SmallString<32> SpellingBuffer;
+ SpellingBuffer.resize(LToken.getLength() + 1);
+ const char *Start = SpellingBuffer.data();
+ unsigned Length =
+ Lexer::getSpelling(LToken, Start, SourceMgr, L.getLangOpts());
+ uint64_t Value;
+ if (StringRef(Start, Length).getAsInteger(0, Value)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token);
+ HadError = true;
+ goto retry;
+ }
+
+ Tok.Kind = MMToken::IntegerLiteral;
+ Tok.IntegerValue = Value;
+ break;
+ }
+
+ case tok::comment:
+ goto retry;
+
+ case tok::hash:
+ // A module map can be terminated prematurely by
+ // #pragma clang module contents
+ // When building the module, we'll treat the rest of the file as the
+ // contents of the module.
+ {
+ auto NextIsIdent = [&](StringRef Str) -> bool {
+ L.LexFromRawLexer(LToken);
+ return !LToken.isAtStartOfLine() && LToken.is(tok::raw_identifier) &&
+ LToken.getRawIdentifier() == Str;
+ };
+ if (NextIsIdent("pragma") && NextIsIdent("clang") &&
+ NextIsIdent("module") && NextIsIdent("contents")) {
+ Tok.Kind = MMToken::EndOfFile;
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
+
+ default:
+ Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token);
+ HadError = true;
+ goto retry;
+ }
+
+ return Result;
+}
+
+void ModuleMapParser::skipUntil(MMToken::TokenKind K) {
+ unsigned braceDepth = 0;
+ unsigned squareDepth = 0;
+ do {
+ switch (Tok.Kind) {
+ case MMToken::EndOfFile:
+ return;
+
+ case MMToken::LBrace:
+ if (Tok.is(K) && braceDepth == 0 && squareDepth == 0)
+ return;
+
+ ++braceDepth;
+ break;
+
+ case MMToken::LSquare:
+ if (Tok.is(K) && braceDepth == 0 && squareDepth == 0)
+ return;
+
+ ++squareDepth;
+ break;
+
+ case MMToken::RBrace:
+ if (braceDepth > 0)
+ --braceDepth;
+ else if (Tok.is(K))
+ return;
+ break;
+
+ case MMToken::RSquare:
+ if (squareDepth > 0)
+ --squareDepth;
+ else if (Tok.is(K))
+ return;
+ break;
+
+ default:
+ if (braceDepth == 0 && squareDepth == 0 && Tok.is(K))
+ return;
+ break;
+ }
+
+ consumeToken();
+ } while (true);
+}
+
+/// Parse a module-id.
+///
+/// module-id:
+/// identifier
+/// identifier '.' module-id
+///
+/// \returns true if an error occurred, false otherwise.
+bool ModuleMapParser::parseModuleId(ModuleId &Id) {
+ Id.clear();
+ do {
+ if (Tok.is(MMToken::Identifier) || Tok.is(MMToken::StringLiteral)) {
+ Id.push_back(std::make_pair(Tok.getString(), Tok.getLocation()));
+ consumeToken();
+ } else {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module_name);
+ return true;
+ }
+
+ if (!Tok.is(MMToken::Period))
+ break;
+
+ consumeToken();
+ } while (true);
+
+ return false;
+}
+
+namespace {
+
+ /// Enumerates the known attributes.
+ enum AttributeKind {
+ /// An unknown attribute.
+ AT_unknown,
+
+ /// The 'system' attribute.
+ AT_system,
+
+ /// The 'extern_c' attribute.
+ AT_extern_c,
+
+ /// The 'exhaustive' attribute.
+ AT_exhaustive,
+
+ /// The 'no_undeclared_includes' attribute.
+ AT_no_undeclared_includes
+ };
+
+} // namespace
+
+/// Private modules are canonicalized as Foo_Private. Clang provides extra
+/// module map search logic to find the appropriate private module when PCH
+/// is used with implicit module maps. Warn when private modules are written
+/// in other ways (FooPrivate and Foo.Private), providing notes and fixits.
+void ModuleMapParser::diagnosePrivateModules(SourceLocation ExplicitLoc,
+ SourceLocation FrameworkLoc) {
+ auto GenNoteAndFixIt = [&](StringRef BadName, StringRef Canonical,
+ const Module *M, SourceRange ReplLoc) {
+ auto D = Diags.Report(ActiveModule->DefinitionLoc,
+ diag::note_mmap_rename_top_level_private_module);
+ D << BadName << M->Name;
+ D << FixItHint::CreateReplacement(ReplLoc, Canonical);
+ };
+
+ for (auto E = Map.module_begin(); E != Map.module_end(); ++E) {
+ auto const *M = E->getValue();
+ if (M->Directory != ActiveModule->Directory)
+ continue;
+
+ SmallString<128> FullName(ActiveModule->getFullModuleName());
+ if (!FullName.startswith(M->Name) && !FullName.endswith("Private"))
+ continue;
+ SmallString<128> FixedPrivModDecl;
+ SmallString<128> Canonical(M->Name);
+ Canonical.append("_Private");
+
+ // Foo.Private -> Foo_Private
+ if (ActiveModule->Parent && ActiveModule->Name == "Private" && !M->Parent &&
+ M->Name == ActiveModule->Parent->Name) {
+ Diags.Report(ActiveModule->DefinitionLoc,
+ diag::warn_mmap_mismatched_private_submodule)
+ << FullName;
+
+ SourceLocation FixItInitBegin = CurrModuleDeclLoc;
+ if (FrameworkLoc.isValid())
+ FixItInitBegin = FrameworkLoc;
+ if (ExplicitLoc.isValid())
+ FixItInitBegin = ExplicitLoc;
+
+ if (FrameworkLoc.isValid() || ActiveModule->Parent->IsFramework)
+ FixedPrivModDecl.append("framework ");
+ FixedPrivModDecl.append("module ");
+ FixedPrivModDecl.append(Canonical);
+
+ GenNoteAndFixIt(FullName, FixedPrivModDecl, M,
+ SourceRange(FixItInitBegin, ActiveModule->DefinitionLoc));
+ continue;
+ }
+
+ // FooPrivate and whatnots -> Foo_Private
+ if (!ActiveModule->Parent && !M->Parent && M->Name != ActiveModule->Name &&
+ ActiveModule->Name != Canonical) {
+ Diags.Report(ActiveModule->DefinitionLoc,
+ diag::warn_mmap_mismatched_private_module_name)
+ << ActiveModule->Name;
+ GenNoteAndFixIt(ActiveModule->Name, Canonical, M,
+ SourceRange(ActiveModule->DefinitionLoc));
+ }
+ }
+}
+
+/// Parse a module declaration.
+///
+/// module-declaration:
+/// 'extern' 'module' module-id string-literal
+/// 'explicit'[opt] 'framework'[opt] 'module' module-id attributes[opt]
+/// { module-member* }
+///
+/// module-member:
+/// requires-declaration
+/// header-declaration
+/// submodule-declaration
+/// export-declaration
+/// export-as-declaration
+/// link-declaration
+///
+/// submodule-declaration:
+/// module-declaration
+/// inferred-submodule-declaration
+void ModuleMapParser::parseModuleDecl() {
+ assert(Tok.is(MMToken::ExplicitKeyword) || Tok.is(MMToken::ModuleKeyword) ||
+ Tok.is(MMToken::FrameworkKeyword) || Tok.is(MMToken::ExternKeyword));
+ if (Tok.is(MMToken::ExternKeyword)) {
+ parseExternModuleDecl();
+ return;
+ }
+
+ // Parse 'explicit' or 'framework' keyword, if present.
+ SourceLocation ExplicitLoc;
+ SourceLocation FrameworkLoc;
+ bool Explicit = false;
+ bool Framework = false;
+
+ // Parse 'explicit' keyword, if present.
+ if (Tok.is(MMToken::ExplicitKeyword)) {
+ ExplicitLoc = consumeToken();
+ Explicit = true;
+ }
+
+ // Parse 'framework' keyword, if present.
+ if (Tok.is(MMToken::FrameworkKeyword)) {
+ FrameworkLoc = consumeToken();
+ Framework = true;
+ }
+
+ // Parse 'module' keyword.
+ if (!Tok.is(MMToken::ModuleKeyword)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module);
+ consumeToken();
+ HadError = true;
+ return;
+ }
+ CurrModuleDeclLoc = consumeToken(); // 'module' keyword
+
+ // If we have a wildcard for the module name, this is an inferred submodule.
+ // Parse it.
+ if (Tok.is(MMToken::Star))
+ return parseInferredModuleDecl(Framework, Explicit);
+
+ // Parse the module name.
+ ModuleId Id;
+ if (parseModuleId(Id)) {
+ HadError = true;
+ return;
+ }
+
+ if (ActiveModule) {
+ if (Id.size() > 1) {
+ Diags.Report(Id.front().second, diag::err_mmap_nested_submodule_id)
+ << SourceRange(Id.front().second, Id.back().second);
+
+ HadError = true;
+ return;
+ }
+ } else if (Id.size() == 1 && Explicit) {
+ // Top-level modules can't be explicit.
+ Diags.Report(ExplicitLoc, diag::err_mmap_explicit_top_level);
+ Explicit = false;
+ ExplicitLoc = SourceLocation();
+ HadError = true;
+ }
+
+ Module *PreviousActiveModule = ActiveModule;
+ if (Id.size() > 1) {
+ // This module map defines a submodule. Go find the module of which it
+ // is a submodule.
+ ActiveModule = nullptr;
+ const Module *TopLevelModule = nullptr;
+ for (unsigned I = 0, N = Id.size() - 1; I != N; ++I) {
+ if (Module *Next = Map.lookupModuleQualified(Id[I].first, ActiveModule)) {
+ if (I == 0)
+ TopLevelModule = Next;
+ ActiveModule = Next;
+ continue;
+ }
+
+ if (ActiveModule) {
+ Diags.Report(Id[I].second, diag::err_mmap_missing_module_qualified)
+ << Id[I].first
+ << ActiveModule->getTopLevelModule()->getFullModuleName();
+ } else {
+ Diags.Report(Id[I].second, diag::err_mmap_expected_module_name);
+ }
+ HadError = true;
+ return;
+ }
+
+ if (ModuleMapFile != Map.getContainingModuleMapFile(TopLevelModule)) {
+ assert(ModuleMapFile != Map.getModuleMapFileForUniquing(TopLevelModule) &&
+ "submodule defined in same file as 'module *' that allowed its "
+ "top-level module");
+ Map.addAdditionalModuleMapFile(TopLevelModule, ModuleMapFile);
+ }
+ }
+
+ StringRef ModuleName = Id.back().first;
+ SourceLocation ModuleNameLoc = Id.back().second;
+
+ // Parse the optional attribute list.
+ Attributes Attrs;
+ if (parseOptionalAttributes(Attrs))
+ return;
+
+ // Parse the opening brace.
+ if (!Tok.is(MMToken::LBrace)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace)
+ << ModuleName;
+ HadError = true;
+ return;
+ }
+ SourceLocation LBraceLoc = consumeToken();
+
+ // Determine whether this (sub)module has already been defined.
+ Module *ShadowingModule = nullptr;
+ if (Module *Existing = Map.lookupModuleQualified(ModuleName, ActiveModule)) {
+ // We might see a (re)definition of a module that we already have a
+ // definition for in two cases:
+ // - If we loaded one definition from an AST file and we've just found a
+ // corresponding definition in a module map file, or
+ bool LoadedFromASTFile = Existing->DefinitionLoc.isInvalid();
+ // - If we're building a (preprocessed) module and we've just loaded the
+ // module map file from which it was created.
+ bool ParsedAsMainInput =
+ Map.LangOpts.getCompilingModule() == LangOptions::CMK_ModuleMap &&
+ Map.LangOpts.CurrentModule == ModuleName &&
+ SourceMgr.getDecomposedLoc(ModuleNameLoc).first !=
+ SourceMgr.getDecomposedLoc(Existing->DefinitionLoc).first;
+ if (!ActiveModule && (LoadedFromASTFile || ParsedAsMainInput)) {
+ // Skip the module definition.
+ skipUntil(MMToken::RBrace);
+ if (Tok.is(MMToken::RBrace))
+ consumeToken();
+ else {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace);
+ Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match);
+ HadError = true;
+ }
+ return;
+ }
+
+ if (!Existing->Parent && Map.mayShadowNewModule(Existing)) {
+ ShadowingModule = Existing;
+ } else {
+ // This is not a shawdowed module decl, it is an illegal redefinition.
+ Diags.Report(ModuleNameLoc, diag::err_mmap_module_redefinition)
+ << ModuleName;
+ Diags.Report(Existing->DefinitionLoc, diag::note_mmap_prev_definition);
+
+ // Skip the module definition.
+ skipUntil(MMToken::RBrace);
+ if (Tok.is(MMToken::RBrace))
+ consumeToken();
+
+ HadError = true;
+ return;
+ }
+ }
+
+ // Start defining this module.
+ if (ShadowingModule) {
+ ActiveModule =
+ Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
+ } else {
+ ActiveModule =
+ Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
+ .first;
+ }
+
+ ActiveModule->DefinitionLoc = ModuleNameLoc;
+ if (Attrs.IsSystem || IsSystem)
+ ActiveModule->IsSystem = true;
+ if (Attrs.IsExternC)
+ ActiveModule->IsExternC = true;
+ if (Attrs.NoUndeclaredIncludes ||
+ (!ActiveModule->Parent && ModuleName == "Darwin"))
+ ActiveModule->NoUndeclaredIncludes = true;
+ ActiveModule->Directory = Directory;
+
+ StringRef MapFileName(ModuleMapFile->getName());
+ if (MapFileName.endswith("module.private.modulemap") ||
+ MapFileName.endswith("module_private.map")) {
+ ActiveModule->ModuleMapIsPrivate = true;
+ }
+
+ // Private modules named as FooPrivate, Foo.Private or similar are likely a
+ // user error; provide warnings, notes and fixits to direct users to use
+ // Foo_Private instead.
+ SourceLocation StartLoc =
+ SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
+ if (Map.HeaderInfo.getHeaderSearchOpts().ImplicitModuleMaps &&
+ !Diags.isIgnored(diag::warn_mmap_mismatched_private_submodule,
+ StartLoc) &&
+ !Diags.isIgnored(diag::warn_mmap_mismatched_private_module_name,
+ StartLoc) &&
+ ActiveModule->ModuleMapIsPrivate)
+ diagnosePrivateModules(ExplicitLoc, FrameworkLoc);
+
+ bool Done = false;
+ do {
+ switch (Tok.Kind) {
+ case MMToken::EndOfFile:
+ case MMToken::RBrace:
+ Done = true;
+ break;
+
+ case MMToken::ConfigMacros:
+ parseConfigMacros();
+ break;
+
+ case MMToken::Conflict:
+ parseConflict();
+ break;
+
+ case MMToken::ExplicitKeyword:
+ case MMToken::ExternKeyword:
+ case MMToken::FrameworkKeyword:
+ case MMToken::ModuleKeyword:
+ parseModuleDecl();
+ break;
+
+ case MMToken::ExportKeyword:
+ parseExportDecl();
+ break;
+
+ case MMToken::ExportAsKeyword:
+ parseExportAsDecl();
+ break;
+
+ case MMToken::UseKeyword:
+ parseUseDecl();
+ break;
+
+ case MMToken::RequiresKeyword:
+ parseRequiresDecl();
+ break;
+
+ case MMToken::TextualKeyword:
+ parseHeaderDecl(MMToken::TextualKeyword, consumeToken());
+ break;
+
+ case MMToken::UmbrellaKeyword: {
+ SourceLocation UmbrellaLoc = consumeToken();
+ if (Tok.is(MMToken::HeaderKeyword))
+ parseHeaderDecl(MMToken::UmbrellaKeyword, UmbrellaLoc);
+ else
+ parseUmbrellaDirDecl(UmbrellaLoc);
+ break;
+ }
+
+ case MMToken::ExcludeKeyword:
+ parseHeaderDecl(MMToken::ExcludeKeyword, consumeToken());
+ break;
+
+ case MMToken::PrivateKeyword:
+ parseHeaderDecl(MMToken::PrivateKeyword, consumeToken());
+ break;
+
+ case MMToken::HeaderKeyword:
+ parseHeaderDecl(MMToken::HeaderKeyword, consumeToken());
+ break;
+
+ case MMToken::LinkKeyword:
+ parseLinkDecl();
+ break;
+
+ default:
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_member);
+ consumeToken();
+ break;
+ }
+ } while (!Done);
+
+ if (Tok.is(MMToken::RBrace))
+ consumeToken();
+ else {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace);
+ Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match);
+ HadError = true;
+ }
+
+ // If the active module is a top-level framework, and there are no link
+ // libraries, automatically link against the framework.
+ if (ActiveModule->IsFramework && !ActiveModule->isSubFramework() &&
+ ActiveModule->LinkLibraries.empty()) {
+ inferFrameworkLink(ActiveModule, Directory, SourceMgr.getFileManager());
+ }
+
+ // If the module meets all requirements but is still unavailable, mark the
+ // whole tree as unavailable to prevent it from building.
+ if (!ActiveModule->IsAvailable && !ActiveModule->IsMissingRequirement &&
+ ActiveModule->Parent) {
+ ActiveModule->getTopLevelModule()->markUnavailable();
+ ActiveModule->getTopLevelModule()->MissingHeaders.append(
+ ActiveModule->MissingHeaders.begin(), ActiveModule->MissingHeaders.end());
+ }
+
+ // We're done parsing this module. Pop back to the previous module.
+ ActiveModule = PreviousActiveModule;
+}
+
+/// Parse an extern module declaration.
+///
+/// extern module-declaration:
+/// 'extern' 'module' module-id string-literal
+void ModuleMapParser::parseExternModuleDecl() {
+ assert(Tok.is(MMToken::ExternKeyword));
+ SourceLocation ExternLoc = consumeToken(); // 'extern' keyword
+
+ // Parse 'module' keyword.
+ if (!Tok.is(MMToken::ModuleKeyword)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module);
+ consumeToken();
+ HadError = true;
+ return;
+ }
+ consumeToken(); // 'module' keyword
+
+ // Parse the module name.
+ ModuleId Id;
+ if (parseModuleId(Id)) {
+ HadError = true;
+ return;
+ }
+
+ // Parse the referenced module map file name.
+ if (!Tok.is(MMToken::StringLiteral)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_mmap_file);
+ HadError = true;
+ return;
+ }
+ std::string FileName = Tok.getString();
+ consumeToken(); // filename
+
+ StringRef FileNameRef = FileName;
+ SmallString<128> ModuleMapFileName;
+ if (llvm::sys::path::is_relative(FileNameRef)) {
+ ModuleMapFileName += Directory->getName();
+ llvm::sys::path::append(ModuleMapFileName, FileName);
+ FileNameRef = ModuleMapFileName;
+ }
+ if (auto File = SourceMgr.getFileManager().getFile(FileNameRef))
+ Map.parseModuleMapFile(
+ *File, /*IsSystem=*/false,
+ Map.HeaderInfo.getHeaderSearchOpts().ModuleMapFileHomeIsCwd
+ ? Directory
+ : (*File)->getDir(),
+ FileID(), nullptr, ExternLoc);
+}
+
+/// Whether to add the requirement \p Feature to the module \p M.
+///
+/// This preserves backwards compatibility for two hacks in the Darwin system
+/// module map files:
+///
+/// 1. The use of 'requires excluded' to make headers non-modular, which
+/// should really be mapped to 'textual' now that we have this feature. We
+/// drop the 'excluded' requirement, and set \p IsRequiresExcludedHack to
+/// true. Later, this bit will be used to map all the headers inside this
+/// module to 'textual'.
+///
+/// This affects Darwin.C.excluded (for assert.h) and Tcl.Private.
+///
+/// 2. Removes a bogus cplusplus requirement from IOKit.avc. This requirement
+/// was never correct and causes issues now that we check it, so drop it.
+static bool shouldAddRequirement(Module *M, StringRef Feature,
+ bool &IsRequiresExcludedHack) {
+ if (Feature == "excluded" &&
+ (M->fullModuleNameIs({"Darwin", "C", "excluded"}) ||
+ M->fullModuleNameIs({"Tcl", "Private"}))) {
+ IsRequiresExcludedHack = true;
+ return false;
+ } else if (Feature == "cplusplus" && M->fullModuleNameIs({"IOKit", "avc"})) {
+ return false;
+ }
+
+ return true;
+}
+
+/// Parse a requires declaration.
+///
+/// requires-declaration:
+/// 'requires' feature-list
+///
+/// feature-list:
+/// feature ',' feature-list
+/// feature
+///
+/// feature:
+/// '!'[opt] identifier
+void ModuleMapParser::parseRequiresDecl() {
+ assert(Tok.is(MMToken::RequiresKeyword));
+
+ // Parse 'requires' keyword.
+ consumeToken();
+
+ // Parse the feature-list.
+ do {
+ bool RequiredState = true;
+ if (Tok.is(MMToken::Exclaim)) {
+ RequiredState = false;
+ consumeToken();
+ }
+
+ if (!Tok.is(MMToken::Identifier)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_feature);
+ HadError = true;
+ return;
+ }
+
+ // Consume the feature name.
+ std::string Feature = Tok.getString();
+ consumeToken();
+
+ bool IsRequiresExcludedHack = false;
+ bool ShouldAddRequirement =
+ shouldAddRequirement(ActiveModule, Feature, IsRequiresExcludedHack);
+
+ if (IsRequiresExcludedHack)
+ UsesRequiresExcludedHack.insert(ActiveModule);
+
+ if (ShouldAddRequirement) {
+ // Add this feature.
+ ActiveModule->addRequirement(Feature, RequiredState, Map.LangOpts,
+ *Map.Target);
+ }
+
+ if (!Tok.is(MMToken::Comma))
+ break;
+
+ // Consume the comma.
+ consumeToken();
+ } while (true);
+}
+
+/// Parse a header declaration.
+///
+/// header-declaration:
+/// 'textual'[opt] 'header' string-literal
+/// 'private' 'textual'[opt] 'header' string-literal
+/// 'exclude' 'header' string-literal
+/// 'umbrella' 'header' string-literal
+///
+/// FIXME: Support 'private textual header'.
+void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,
+ SourceLocation LeadingLoc) {
+ // We've already consumed the first token.
+ ModuleMap::ModuleHeaderRole Role = ModuleMap::NormalHeader;
+ if (LeadingToken == MMToken::PrivateKeyword) {
+ Role = ModuleMap::PrivateHeader;
+ // 'private' may optionally be followed by 'textual'.
+ if (Tok.is(MMToken::TextualKeyword)) {
+ LeadingToken = Tok.Kind;
+ consumeToken();
+ }
+ }
+
+ if (LeadingToken == MMToken::TextualKeyword)
+ Role = ModuleMap::ModuleHeaderRole(Role | ModuleMap::TextualHeader);
+
+ if (UsesRequiresExcludedHack.count(ActiveModule)) {
+ // Mark this header 'textual' (see doc comment for
+ // Module::UsesRequiresExcludedHack).
+ Role = ModuleMap::ModuleHeaderRole(Role | ModuleMap::TextualHeader);
+ }
+
+ if (LeadingToken != MMToken::HeaderKeyword) {
+ if (!Tok.is(MMToken::HeaderKeyword)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header)
+ << (LeadingToken == MMToken::PrivateKeyword ? "private" :
+ LeadingToken == MMToken::ExcludeKeyword ? "exclude" :
+ LeadingToken == MMToken::TextualKeyword ? "textual" : "umbrella");
+ return;
+ }
+ consumeToken();
+ }
+
+ // Parse the header name.
+ if (!Tok.is(MMToken::StringLiteral)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header)
+ << "header";
+ HadError = true;
+ return;
+ }
+ Module::UnresolvedHeaderDirective Header;
+ Header.FileName = Tok.getString();
+ Header.FileNameLoc = consumeToken();
+ Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword;
+ Header.Kind =
+ (LeadingToken == MMToken::ExcludeKeyword ? Module::HK_Excluded
+ : Map.headerRoleToKind(Role));
+
+ // Check whether we already have an umbrella.
+ if (Header.IsUmbrella && ActiveModule->Umbrella) {
+ Diags.Report(Header.FileNameLoc, diag::err_mmap_umbrella_clash)
+ << ActiveModule->getFullModuleName();
+ HadError = true;
+ return;
+ }
+
+ // If we were given stat information, parse it so we can skip looking for
+ // the file.
+ if (Tok.is(MMToken::LBrace)) {
+ SourceLocation LBraceLoc = consumeToken();
+
+ while (!Tok.is(MMToken::RBrace) && !Tok.is(MMToken::EndOfFile)) {
+ enum Attribute { Size, ModTime, Unknown };
+ StringRef Str = Tok.getString();
+ SourceLocation Loc = consumeToken();
+ switch (llvm::StringSwitch<Attribute>(Str)
+ .Case("size", Size)
+ .Case("mtime", ModTime)
+ .Default(Unknown)) {
+ case Size:
+ if (Header.Size)
+ Diags.Report(Loc, diag::err_mmap_duplicate_header_attribute) << Str;
+ if (!Tok.is(MMToken::IntegerLiteral)) {
+ Diags.Report(Tok.getLocation(),
+ diag::err_mmap_invalid_header_attribute_value) << Str;
+ skipUntil(MMToken::RBrace);
+ break;
+ }
+ Header.Size = Tok.getInteger();
+ consumeToken();
+ break;
+
+ case ModTime:
+ if (Header.ModTime)
+ Diags.Report(Loc, diag::err_mmap_duplicate_header_attribute) << Str;
+ if (!Tok.is(MMToken::IntegerLiteral)) {
+ Diags.Report(Tok.getLocation(),
+ diag::err_mmap_invalid_header_attribute_value) << Str;
+ skipUntil(MMToken::RBrace);
+ break;
+ }
+ Header.ModTime = Tok.getInteger();
+ consumeToken();
+ break;
+
+ case Unknown:
+ Diags.Report(Loc, diag::err_mmap_expected_header_attribute);
+ skipUntil(MMToken::RBrace);
+ break;
+ }
+ }
+
+ if (Tok.is(MMToken::RBrace))
+ consumeToken();
+ else {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace);
+ Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match);
+ HadError = true;
+ }
+ }
+
+ bool NeedsFramework = false;
+ Map.addUnresolvedHeader(ActiveModule, std::move(Header), NeedsFramework);
+
+ if (NeedsFramework && ActiveModule)
+ Diags.Report(CurrModuleDeclLoc, diag::note_mmap_add_framework_keyword)
+ << ActiveModule->getFullModuleName()
+ << FixItHint::CreateReplacement(CurrModuleDeclLoc, "framework module");
+}
+
+static int compareModuleHeaders(const Module::Header *A,
+ const Module::Header *B) {
+ return A->NameAsWritten.compare(B->NameAsWritten);
+}
+
+/// Parse an umbrella directory declaration.
+///
+/// umbrella-dir-declaration:
+/// umbrella string-literal
+void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) {
+ // Parse the directory name.
+ if (!Tok.is(MMToken::StringLiteral)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header)
+ << "umbrella";
+ HadError = true;
+ return;
+ }
+
+ std::string DirName = Tok.getString();
+ SourceLocation DirNameLoc = consumeToken();
+
+ // Check whether we already have an umbrella.
+ if (ActiveModule->Umbrella) {
+ Diags.Report(DirNameLoc, diag::err_mmap_umbrella_clash)
+ << ActiveModule->getFullModuleName();
+ HadError = true;
+ return;
+ }
+
+ // Look for this file.
+ const DirectoryEntry *Dir = nullptr;
+ if (llvm::sys::path::is_absolute(DirName)) {
+ if (auto D = SourceMgr.getFileManager().getDirectory(DirName))
+ Dir = *D;
+ } else {
+ SmallString<128> PathName;
+ PathName = Directory->getName();
+ llvm::sys::path::append(PathName, DirName);
+ if (auto D = SourceMgr.getFileManager().getDirectory(PathName))
+ Dir = *D;
+ }
+
+ if (!Dir) {
+ Diags.Report(DirNameLoc, diag::warn_mmap_umbrella_dir_not_found)
+ << DirName;
+ return;
+ }
+
+ if (UsesRequiresExcludedHack.count(ActiveModule)) {
+ // Mark this header 'textual' (see doc comment for
+ // ModuleMapParser::UsesRequiresExcludedHack). Although iterating over the
+ // directory is relatively expensive, in practice this only applies to the
+ // uncommonly used Tcl module on Darwin platforms.
+ std::error_code EC;
+ SmallVector<Module::Header, 6> Headers;
+ llvm::vfs::FileSystem &FS =
+ SourceMgr.getFileManager().getVirtualFileSystem();
+ for (llvm::vfs::recursive_directory_iterator I(FS, Dir->getName(), EC), E;
+ I != E && !EC; I.increment(EC)) {
+ if (auto FE = SourceMgr.getFileManager().getFile(I->path())) {
+
+ Module::Header Header = {I->path(), *FE};
+ Headers.push_back(std::move(Header));
+ }
+ }
+
+ // Sort header paths so that the pcm doesn't depend on iteration order.
+ llvm::array_pod_sort(Headers.begin(), Headers.end(), compareModuleHeaders);
+
+ for (auto &Header : Headers)
+ Map.addHeader(ActiveModule, std::move(Header), ModuleMap::TextualHeader);
+ return;
+ }
+
+ if (Module *OwningModule = Map.UmbrellaDirs[Dir]) {
+ Diags.Report(UmbrellaLoc, diag::err_mmap_umbrella_clash)
+ << OwningModule->getFullModuleName();
+ HadError = true;
+ return;
+ }
+
+ // Record this umbrella directory.
+ Map.setUmbrellaDir(ActiveModule, Dir, DirName);
+}
+
+/// Parse a module export declaration.
+///
+/// export-declaration:
+/// 'export' wildcard-module-id
+///
+/// wildcard-module-id:
+/// identifier
+/// '*'
+/// identifier '.' wildcard-module-id
+void ModuleMapParser::parseExportDecl() {
+ assert(Tok.is(MMToken::ExportKeyword));
+ SourceLocation ExportLoc = consumeToken();
+
+ // Parse the module-id with an optional wildcard at the end.
+ ModuleId ParsedModuleId;
+ bool Wildcard = false;
+ do {
+ // FIXME: Support string-literal module names here.
+ if (Tok.is(MMToken::Identifier)) {
+ ParsedModuleId.push_back(std::make_pair(Tok.getString(),
+ Tok.getLocation()));
+ consumeToken();
+
+ if (Tok.is(MMToken::Period)) {
+ consumeToken();
+ continue;
+ }
+
+ break;
+ }
+
+ if(Tok.is(MMToken::Star)) {
+ Wildcard = true;
+ consumeToken();
+ break;
+ }
+
+ Diags.Report(Tok.getLocation(), diag::err_mmap_module_id);
+ HadError = true;
+ return;
+ } while (true);
+
+ Module::UnresolvedExportDecl Unresolved = {
+ ExportLoc, ParsedModuleId, Wildcard
+ };
+ ActiveModule->UnresolvedExports.push_back(Unresolved);
+}
+
+/// Parse a module export_as declaration.
+///
+/// export-as-declaration:
+/// 'export_as' identifier
+void ModuleMapParser::parseExportAsDecl() {
+ assert(Tok.is(MMToken::ExportAsKeyword));
+ consumeToken();
+
+ if (!Tok.is(MMToken::Identifier)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_module_id);
+ HadError = true;
+ return;
+ }
+
+ if (ActiveModule->Parent) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_submodule_export_as);
+ consumeToken();
+ return;
+ }
+
+ if (!ActiveModule->ExportAsModule.empty()) {
+ if (ActiveModule->ExportAsModule == Tok.getString()) {
+ Diags.Report(Tok.getLocation(), diag::warn_mmap_redundant_export_as)
+ << ActiveModule->Name << Tok.getString();
+ } else {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_conflicting_export_as)
+ << ActiveModule->Name << ActiveModule->ExportAsModule
+ << Tok.getString();
+ }
+ }
+
+ ActiveModule->ExportAsModule = Tok.getString();
+ Map.addLinkAsDependency(ActiveModule);
+
+ consumeToken();
+}
+
+/// Parse a module use declaration.
+///
+/// use-declaration:
+/// 'use' wildcard-module-id
+void ModuleMapParser::parseUseDecl() {
+ assert(Tok.is(MMToken::UseKeyword));
+ auto KWLoc = consumeToken();
+ // Parse the module-id.
+ ModuleId ParsedModuleId;
+ parseModuleId(ParsedModuleId);
+
+ if (ActiveModule->Parent)
+ Diags.Report(KWLoc, diag::err_mmap_use_decl_submodule);
+ else
+ ActiveModule->UnresolvedDirectUses.push_back(ParsedModuleId);
+}
+
+/// Parse a link declaration.
+///
+/// module-declaration:
+/// 'link' 'framework'[opt] string-literal
+void ModuleMapParser::parseLinkDecl() {
+ assert(Tok.is(MMToken::LinkKeyword));
+ SourceLocation LinkLoc = consumeToken();
+
+ // Parse the optional 'framework' keyword.
+ bool IsFramework = false;
+ if (Tok.is(MMToken::FrameworkKeyword)) {
+ consumeToken();
+ IsFramework = true;
+ }
+
+ // Parse the library name
+ if (!Tok.is(MMToken::StringLiteral)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_library_name)
+ << IsFramework << SourceRange(LinkLoc);
+ HadError = true;
+ return;
+ }
+
+ std::string LibraryName = Tok.getString();
+ consumeToken();
+ ActiveModule->LinkLibraries.push_back(Module::LinkLibrary(LibraryName,
+ IsFramework));
+}
+
+/// Parse a configuration macro declaration.
+///
+/// module-declaration:
+/// 'config_macros' attributes[opt] config-macro-list?
+///
+/// config-macro-list:
+/// identifier (',' identifier)?
+void ModuleMapParser::parseConfigMacros() {
+ assert(Tok.is(MMToken::ConfigMacros));
+ SourceLocation ConfigMacrosLoc = consumeToken();
+
+ // Only top-level modules can have configuration macros.
+ if (ActiveModule->Parent) {
+ Diags.Report(ConfigMacrosLoc, diag::err_mmap_config_macro_submodule);
+ }
+
+ // Parse the optional attributes.
+ Attributes Attrs;
+ if (parseOptionalAttributes(Attrs))
+ return;
+
+ if (Attrs.IsExhaustive && !ActiveModule->Parent) {
+ ActiveModule->ConfigMacrosExhaustive = true;
+ }
+
+ // If we don't have an identifier, we're done.
+ // FIXME: Support macros with the same name as a keyword here.
+ if (!Tok.is(MMToken::Identifier))
+ return;
+
+ // Consume the first identifier.
+ if (!ActiveModule->Parent) {
+ ActiveModule->ConfigMacros.push_back(Tok.getString().str());
+ }
+ consumeToken();
+
+ do {
+ // If there's a comma, consume it.
+ if (!Tok.is(MMToken::Comma))
+ break;
+ consumeToken();
+
+ // We expect to see a macro name here.
+ // FIXME: Support macros with the same name as a keyword here.
+ if (!Tok.is(MMToken::Identifier)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_config_macro);
+ break;
+ }
+
+ // Consume the macro name.
+ if (!ActiveModule->Parent) {
+ ActiveModule->ConfigMacros.push_back(Tok.getString().str());
+ }
+ consumeToken();
+ } while (true);
+}
+
+/// Format a module-id into a string.
+static std::string formatModuleId(const ModuleId &Id) {
+ std::string result;
+ {
+ llvm::raw_string_ostream OS(result);
+
+ for (unsigned I = 0, N = Id.size(); I != N; ++I) {
+ if (I)
+ OS << ".";
+ OS << Id[I].first;
+ }
+ }
+
+ return result;
+}
+
+/// Parse a conflict declaration.
+///
+/// module-declaration:
+/// 'conflict' module-id ',' string-literal
+void ModuleMapParser::parseConflict() {
+ assert(Tok.is(MMToken::Conflict));
+ SourceLocation ConflictLoc = consumeToken();
+ Module::UnresolvedConflict Conflict;
+
+ // Parse the module-id.
+ if (parseModuleId(Conflict.Id))
+ return;
+
+ // Parse the ','.
+ if (!Tok.is(MMToken::Comma)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_comma)
+ << SourceRange(ConflictLoc);
+ return;
+ }
+ consumeToken();
+
+ // Parse the message.
+ if (!Tok.is(MMToken::StringLiteral)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_message)
+ << formatModuleId(Conflict.Id);
+ return;
+ }
+ Conflict.Message = Tok.getString().str();
+ consumeToken();
+
+ // Add this unresolved conflict.
+ ActiveModule->UnresolvedConflicts.push_back(Conflict);
+}
+
+/// Parse an inferred module declaration (wildcard modules).
+///
+/// module-declaration:
+/// 'explicit'[opt] 'framework'[opt] 'module' * attributes[opt]
+/// { inferred-module-member* }
+///
+/// inferred-module-member:
+/// 'export' '*'
+/// 'exclude' identifier
+void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) {
+ assert(Tok.is(MMToken::Star));
+ SourceLocation StarLoc = consumeToken();
+ bool Failed = false;
+
+ // Inferred modules must be submodules.
+ if (!ActiveModule && !Framework) {
+ Diags.Report(StarLoc, diag::err_mmap_top_level_inferred_submodule);
+ Failed = true;
+ }
+
+ if (ActiveModule) {
+ // Inferred modules must have umbrella directories.
+ if (!Failed && ActiveModule->IsAvailable &&
+ !ActiveModule->getUmbrellaDir()) {
+ Diags.Report(StarLoc, diag::err_mmap_inferred_no_umbrella);
+ Failed = true;
+ }
+
+ // Check for redefinition of an inferred module.
+ if (!Failed && ActiveModule->InferSubmodules) {
+ Diags.Report(StarLoc, diag::err_mmap_inferred_redef);
+ if (ActiveModule->InferredSubmoduleLoc.isValid())
+ Diags.Report(ActiveModule->InferredSubmoduleLoc,
+ diag::note_mmap_prev_definition);
+ Failed = true;
+ }
+
+ // Check for the 'framework' keyword, which is not permitted here.
+ if (Framework) {
+ Diags.Report(StarLoc, diag::err_mmap_inferred_framework_submodule);
+ Framework = false;
+ }
+ } else if (Explicit) {
+ Diags.Report(StarLoc, diag::err_mmap_explicit_inferred_framework);
+ Explicit = false;
+ }
+
+ // If there were any problems with this inferred submodule, skip its body.
+ if (Failed) {
+ if (Tok.is(MMToken::LBrace)) {
+ consumeToken();
+ skipUntil(MMToken::RBrace);
+ if (Tok.is(MMToken::RBrace))
+ consumeToken();
+ }
+ HadError = true;
+ return;
+ }
+
+ // Parse optional attributes.
+ Attributes Attrs;
+ if (parseOptionalAttributes(Attrs))
+ return;
+
+ if (ActiveModule) {
+ // Note that we have an inferred submodule.
+ ActiveModule->InferSubmodules = true;
+ ActiveModule->InferredSubmoduleLoc = StarLoc;
+ ActiveModule->InferExplicitSubmodules = Explicit;
+ } else {
+ // We'll be inferring framework modules for this directory.
+ Map.InferredDirectories[Directory].InferModules = true;
+ Map.InferredDirectories[Directory].Attrs = Attrs;
+ Map.InferredDirectories[Directory].ModuleMapFile = ModuleMapFile;
+ // FIXME: Handle the 'framework' keyword.
+ }
+
+ // Parse the opening brace.
+ if (!Tok.is(MMToken::LBrace)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace_wildcard);
+ HadError = true;
+ return;
+ }
+ SourceLocation LBraceLoc = consumeToken();
+
+ // Parse the body of the inferred submodule.
+ bool Done = false;
+ do {
+ switch (Tok.Kind) {
+ case MMToken::EndOfFile:
+ case MMToken::RBrace:
+ Done = true;
+ break;
+
+ case MMToken::ExcludeKeyword:
+ if (ActiveModule) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member)
+ << (ActiveModule != nullptr);
+ consumeToken();
+ break;
+ }
+
+ consumeToken();
+ // FIXME: Support string-literal module names here.
+ if (!Tok.is(MMToken::Identifier)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_missing_exclude_name);
+ break;
+ }
+
+ Map.InferredDirectories[Directory].ExcludedModules
+ .push_back(Tok.getString());
+ consumeToken();
+ break;
+
+ case MMToken::ExportKeyword:
+ if (!ActiveModule) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member)
+ << (ActiveModule != nullptr);
+ consumeToken();
+ break;
+ }
+
+ consumeToken();
+ if (Tok.is(MMToken::Star))
+ ActiveModule->InferExportWildcard = true;
+ else
+ Diags.Report(Tok.getLocation(),
+ diag::err_mmap_expected_export_wildcard);
+ consumeToken();
+ break;
+
+ case MMToken::ExplicitKeyword:
+ case MMToken::ModuleKeyword:
+ case MMToken::HeaderKeyword:
+ case MMToken::PrivateKeyword:
+ case MMToken::UmbrellaKeyword:
+ default:
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member)
+ << (ActiveModule != nullptr);
+ consumeToken();
+ break;
+ }
+ } while (!Done);
+
+ if (Tok.is(MMToken::RBrace))
+ consumeToken();
+ else {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace);
+ Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match);
+ HadError = true;
+ }
+}
+
+/// Parse optional attributes.
+///
+/// attributes:
+/// attribute attributes
+/// attribute
+///
+/// attribute:
+/// [ identifier ]
+///
+/// \param Attrs Will be filled in with the parsed attributes.
+///
+/// \returns true if an error occurred, false otherwise.
+bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) {
+ bool HadError = false;
+
+ while (Tok.is(MMToken::LSquare)) {
+ // Consume the '['.
+ SourceLocation LSquareLoc = consumeToken();
+
+ // Check whether we have an attribute name here.
+ if (!Tok.is(MMToken::Identifier)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_attribute);
+ skipUntil(MMToken::RSquare);
+ if (Tok.is(MMToken::RSquare))
+ consumeToken();
+ HadError = true;
+ }
+
+ // Decode the attribute name.
+ AttributeKind Attribute
+ = llvm::StringSwitch<AttributeKind>(Tok.getString())
+ .Case("exhaustive", AT_exhaustive)
+ .Case("extern_c", AT_extern_c)
+ .Case("no_undeclared_includes", AT_no_undeclared_includes)
+ .Case("system", AT_system)
+ .Default(AT_unknown);
+ switch (Attribute) {
+ case AT_unknown:
+ Diags.Report(Tok.getLocation(), diag::warn_mmap_unknown_attribute)
+ << Tok.getString();
+ break;
+
+ case AT_system:
+ Attrs.IsSystem = true;
+ break;
+
+ case AT_extern_c:
+ Attrs.IsExternC = true;
+ break;
+
+ case AT_exhaustive:
+ Attrs.IsExhaustive = true;
+ break;
+
+ case AT_no_undeclared_includes:
+ Attrs.NoUndeclaredIncludes = true;
+ break;
+ }
+ consumeToken();
+
+ // Consume the ']'.
+ if (!Tok.is(MMToken::RSquare)) {
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rsquare);
+ Diags.Report(LSquareLoc, diag::note_mmap_lsquare_match);
+ skipUntil(MMToken::RSquare);
+ HadError = true;
+ }
+
+ if (Tok.is(MMToken::RSquare))
+ consumeToken();
+ }
+
+ return HadError;
+}
+
+/// Parse a module map file.
+///
+/// module-map-file:
+/// module-declaration*
+bool ModuleMapParser::parseModuleMapFile() {
+ do {
+ switch (Tok.Kind) {
+ case MMToken::EndOfFile:
+ return HadError;
+
+ case MMToken::ExplicitKeyword:
+ case MMToken::ExternKeyword:
+ case MMToken::ModuleKeyword:
+ case MMToken::FrameworkKeyword:
+ parseModuleDecl();
+ break;
+
+ case MMToken::Comma:
+ case MMToken::ConfigMacros:
+ case MMToken::Conflict:
+ case MMToken::Exclaim:
+ case MMToken::ExcludeKeyword:
+ case MMToken::ExportKeyword:
+ case MMToken::ExportAsKeyword:
+ case MMToken::HeaderKeyword:
+ case MMToken::Identifier:
+ case MMToken::LBrace:
+ case MMToken::LinkKeyword:
+ case MMToken::LSquare:
+ case MMToken::Period:
+ case MMToken::PrivateKeyword:
+ case MMToken::RBrace:
+ case MMToken::RSquare:
+ case MMToken::RequiresKeyword:
+ case MMToken::Star:
+ case MMToken::StringLiteral:
+ case MMToken::IntegerLiteral:
+ case MMToken::TextualKeyword:
+ case MMToken::UmbrellaKeyword:
+ case MMToken::UseKeyword:
+ Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module);
+ HadError = true;
+ consumeToken();
+ break;
+ }
+ } while (true);
+}
+
+bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem,
+ const DirectoryEntry *Dir, FileID ID,
+ unsigned *Offset,
+ SourceLocation ExternModuleLoc) {
+ assert(Target && "Missing target information");
+ llvm::DenseMap<const FileEntry *, bool>::iterator Known
+ = ParsedModuleMap.find(File);
+ if (Known != ParsedModuleMap.end())
+ return Known->second;
+
+ // If the module map file wasn't already entered, do so now.
+ if (ID.isInvalid()) {
+ auto FileCharacter =
+ IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap;
+ ID = SourceMgr.createFileID(File, ExternModuleLoc, FileCharacter);
+ }
+
+ assert(Target && "Missing target information");
+ const llvm::MemoryBuffer *Buffer = SourceMgr.getBuffer(ID);
+ if (!Buffer)
+ return ParsedModuleMap[File] = true;
+ assert((!Offset || *Offset <= Buffer->getBufferSize()) &&
+ "invalid buffer offset");
+
+ // Parse this module map file.
+ Lexer L(SourceMgr.getLocForStartOfFile(ID), MMapLangOpts,
+ Buffer->getBufferStart(),
+ Buffer->getBufferStart() + (Offset ? *Offset : 0),
+ Buffer->getBufferEnd());
+ SourceLocation Start = L.getSourceLocation();
+ ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, File, Dir,
+ IsSystem);
+ bool Result = Parser.parseModuleMapFile();
+ ParsedModuleMap[File] = Result;
+
+ if (Offset) {
+ auto Loc = SourceMgr.getDecomposedLoc(Parser.getLocation());
+ assert(Loc.first == ID && "stopped in a different file?");
+ *Offset = Loc.second;
+ }
+
+ // Notify callbacks that we parsed it.
+ for (const auto &Cb : Callbacks)
+ Cb->moduleMapFileRead(Start, *File, IsSystem);
+
+ return Result;
+}
diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp
new file mode 100644
index 000000000000..31548d246d5a
--- /dev/null
+++ b/clang/lib/Lex/PPCaching.cpp
@@ -0,0 +1,163 @@
+//===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// caching of lexed tokens.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+// EnableBacktrackAtThisPos - From the point that this method is called, and
+// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
+// keeps track of the lexed tokens so that a subsequent Backtrack() call will
+// make the Preprocessor re-lex the same tokens.
+//
+// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
+// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
+// be combined with the EnableBacktrackAtThisPos calls in reverse order.
+void Preprocessor::EnableBacktrackAtThisPos() {
+ assert(LexLevel == 0 && "cannot use lookahead while lexing");
+ BacktrackPositions.push_back(CachedLexPos);
+ EnterCachingLexMode();
+}
+
+// Disable the last EnableBacktrackAtThisPos call.
+void Preprocessor::CommitBacktrackedTokens() {
+ assert(!BacktrackPositions.empty()
+ && "EnableBacktrackAtThisPos was not called!");
+ BacktrackPositions.pop_back();
+}
+
+// Make Preprocessor re-lex the tokens that were lexed since
+// EnableBacktrackAtThisPos() was previously called.
+void Preprocessor::Backtrack() {
+ assert(!BacktrackPositions.empty()
+ && "EnableBacktrackAtThisPos was not called!");
+ CachedLexPos = BacktrackPositions.back();
+ BacktrackPositions.pop_back();
+ recomputeCurLexerKind();
+}
+
+void Preprocessor::CachingLex(Token &Result) {
+ if (!InCachingLexMode())
+ return;
+
+ // The assert in EnterCachingLexMode should prevent this from happening.
+ assert(LexLevel == 1 &&
+ "should not use token caching within the preprocessor");
+
+ if (CachedLexPos < CachedTokens.size()) {
+ Result = CachedTokens[CachedLexPos++];
+ Result.setFlag(Token::IsReinjected);
+ return;
+ }
+
+ ExitCachingLexMode();
+ Lex(Result);
+
+ if (isBacktrackEnabled()) {
+ // Cache the lexed token.
+ EnterCachingLexModeUnchecked();
+ CachedTokens.push_back(Result);
+ ++CachedLexPos;
+ return;
+ }
+
+ if (CachedLexPos < CachedTokens.size()) {
+ EnterCachingLexModeUnchecked();
+ } else {
+ // All cached tokens were consumed.
+ CachedTokens.clear();
+ CachedLexPos = 0;
+ }
+}
+
+void Preprocessor::EnterCachingLexMode() {
+ // The caching layer sits on top of all the other lexers, so it's incorrect
+ // to cache tokens while inside a nested lex action. The cached tokens would
+ // be retained after returning to the enclosing lex action and, at best,
+ // would appear at the wrong position in the token stream.
+ assert(LexLevel == 0 &&
+ "entered caching lex mode while lexing something else");
+
+ if (InCachingLexMode()) {
+ assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
+ return;
+ }
+
+ EnterCachingLexModeUnchecked();
+}
+
+void Preprocessor::EnterCachingLexModeUnchecked() {
+ assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode");
+ PushIncludeMacroStack();
+ CurLexerKind = CLK_CachingLexer;
+}
+
+
+const Token &Preprocessor::PeekAhead(unsigned N) {
+ assert(CachedLexPos + N > CachedTokens.size() && "Confused caching.");
+ ExitCachingLexMode();
+ for (size_t C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
+ CachedTokens.push_back(Token());
+ Lex(CachedTokens.back());
+ }
+ EnterCachingLexMode();
+ return CachedTokens.back();
+}
+
+void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
+ assert(Tok.isAnnotation() && "Expected annotation token");
+ assert(CachedLexPos != 0 && "Expected to have some cached tokens");
+ assert(CachedTokens[CachedLexPos-1].getLastLoc() == Tok.getAnnotationEndLoc()
+ && "The annotation should be until the most recent cached token");
+
+ // Start from the end of the cached tokens list and look for the token
+ // that is the beginning of the annotation token.
+ for (CachedTokensTy::size_type i = CachedLexPos; i != 0; --i) {
+ CachedTokensTy::iterator AnnotBegin = CachedTokens.begin() + i-1;
+ if (AnnotBegin->getLocation() == Tok.getLocation()) {
+ assert((BacktrackPositions.empty() || BacktrackPositions.back() <= i) &&
+ "The backtrack pos points inside the annotated tokens!");
+ // Replace the cached tokens with the single annotation token.
+ if (i < CachedLexPos)
+ CachedTokens.erase(AnnotBegin + 1, CachedTokens.begin() + CachedLexPos);
+ *AnnotBegin = Tok;
+ CachedLexPos = i;
+ return;
+ }
+ }
+}
+
+bool Preprocessor::IsPreviousCachedToken(const Token &Tok) const {
+ // There's currently no cached token...
+ if (!CachedLexPos)
+ return false;
+
+ const Token LastCachedTok = CachedTokens[CachedLexPos - 1];
+ if (LastCachedTok.getKind() != Tok.getKind())
+ return false;
+
+ int RelOffset = 0;
+ if ((!getSourceManager().isInSameSLocAddrSpace(
+ Tok.getLocation(), getLastCachedTokenLocation(), &RelOffset)) ||
+ RelOffset)
+ return false;
+
+ return true;
+}
+
+void Preprocessor::ReplacePreviousCachedToken(ArrayRef<Token> NewToks) {
+ assert(CachedLexPos != 0 && "Expected to have some cached tokens");
+ CachedTokens.insert(CachedTokens.begin() + CachedLexPos - 1, NewToks.begin(),
+ NewToks.end());
+ CachedTokens.erase(CachedTokens.begin() + CachedLexPos - 1 + NewToks.size());
+ CachedLexPos += NewToks.size() - 1;
+}
diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp
new file mode 100644
index 000000000000..cd8b04b20d24
--- /dev/null
+++ b/clang/lib/Lex/PPCallbacks.cpp
@@ -0,0 +1,13 @@
+//===--- PPCallbacks.cpp - Callbacks for Preprocessor actions ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/PPCallbacks.h"
+
+using namespace clang;
+
+void PPChainedCallbacks::anchor() { }
diff --git a/clang/lib/Lex/PPConditionalDirectiveRecord.cpp b/clang/lib/Lex/PPConditionalDirectiveRecord.cpp
new file mode 100644
index 000000000000..facee28007c7
--- /dev/null
+++ b/clang/lib/Lex/PPConditionalDirectiveRecord.cpp
@@ -0,0 +1,119 @@
+//===--- PPConditionalDirectiveRecord.h - Preprocessing Directives-*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPConditionalDirectiveRecord class, which maintains
+// a record of conditional directive regions.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Lex/PPConditionalDirectiveRecord.h"
+#include "llvm/Support/Capacity.h"
+
+using namespace clang;
+
+PPConditionalDirectiveRecord::PPConditionalDirectiveRecord(SourceManager &SM)
+ : SourceMgr(SM) {
+ CondDirectiveStack.push_back(SourceLocation());
+}
+
+bool PPConditionalDirectiveRecord::rangeIntersectsConditionalDirective(
+ SourceRange Range) const {
+ if (Range.isInvalid())
+ return false;
+
+ CondDirectiveLocsTy::const_iterator low = llvm::lower_bound(
+ CondDirectiveLocs, Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr));
+ if (low == CondDirectiveLocs.end())
+ return false;
+
+ if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc()))
+ return false;
+
+ CondDirectiveLocsTy::const_iterator
+ upp = std::upper_bound(low, CondDirectiveLocs.end(),
+ Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr));
+ SourceLocation uppRegion;
+ if (upp != CondDirectiveLocs.end())
+ uppRegion = upp->getRegionLoc();
+
+ return low->getRegionLoc() != uppRegion;
+}
+
+SourceLocation PPConditionalDirectiveRecord::findConditionalDirectiveRegionLoc(
+ SourceLocation Loc) const {
+ if (Loc.isInvalid())
+ return SourceLocation();
+ if (CondDirectiveLocs.empty())
+ return SourceLocation();
+
+ if (SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
+ Loc))
+ return CondDirectiveStack.back();
+
+ CondDirectiveLocsTy::const_iterator low = llvm::lower_bound(
+ CondDirectiveLocs, Loc, CondDirectiveLoc::Comp(SourceMgr));
+ assert(low != CondDirectiveLocs.end());
+ return low->getRegionLoc();
+}
+
+void PPConditionalDirectiveRecord::addCondDirectiveLoc(
+ CondDirectiveLoc DirLoc) {
+ // Ignore directives in system headers.
+ if (SourceMgr.isInSystemHeader(DirLoc.getLoc()))
+ return;
+
+ assert(CondDirectiveLocs.empty() ||
+ SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(),
+ DirLoc.getLoc()));
+ CondDirectiveLocs.push_back(DirLoc);
+}
+
+void PPConditionalDirectiveRecord::If(SourceLocation Loc,
+ SourceRange ConditionRange,
+ ConditionValueKind ConditionValue) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Ifdef(SourceLocation Loc,
+ const Token &MacroNameTok,
+ const MacroDefinition &MD) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Ifndef(SourceLocation Loc,
+ const Token &MacroNameTok,
+ const MacroDefinition &MD) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.push_back(Loc);
+}
+
+void PPConditionalDirectiveRecord::Elif(SourceLocation Loc,
+ SourceRange ConditionRange,
+ ConditionValueKind ConditionValue,
+ SourceLocation IfLoc) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.back() = Loc;
+}
+
+void PPConditionalDirectiveRecord::Else(SourceLocation Loc,
+ SourceLocation IfLoc) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ CondDirectiveStack.back() = Loc;
+}
+
+void PPConditionalDirectiveRecord::Endif(SourceLocation Loc,
+ SourceLocation IfLoc) {
+ addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
+ assert(!CondDirectiveStack.empty());
+ CondDirectiveStack.pop_back();
+}
+
+size_t PPConditionalDirectiveRecord::getTotalMemory() const {
+ return llvm::capacity_in_bytes(CondDirectiveLocs);
+}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
new file mode 100644
index 000000000000..3b7eaee3c914
--- /dev/null
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -0,0 +1,3084 @@
+//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implements # directive processing for the Preprocessor.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/ModuleMap.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Lex/Token.h"
+#include "clang/Lex/VariadicMacroSupport.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <new>
+#include <string>
+#include <utility>
+
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Utility Methods for Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
+ auto *MIChain = new (BP) MacroInfoChain{L, MIChainHead};
+ MIChainHead = MIChain;
+ return &MIChain->MI;
+}
+
+DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
+ SourceLocation Loc) {
+ return new (BP) DefMacroDirective(MI, Loc);
+}
+
+UndefMacroDirective *
+Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
+ return new (BP) UndefMacroDirective(UndefLoc);
+}
+
+VisibilityMacroDirective *
+Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
+ bool isPublic) {
+ return new (BP) VisibilityMacroDirective(Loc, isPublic);
+}
+
+/// Read and discard all tokens remaining on the current line until
+/// the tok::eod token is found.
+SourceRange Preprocessor::DiscardUntilEndOfDirective() {
+ Token Tmp;
+ SourceRange Res;
+
+ LexUnexpandedToken(Tmp);
+ Res.setBegin(Tmp.getLocation());
+ while (Tmp.isNot(tok::eod)) {
+ assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
+ LexUnexpandedToken(Tmp);
+ }
+ Res.setEnd(Tmp.getLocation());
+ return Res;
+}
+
+/// Enumerates possible cases of #define/#undef a reserved identifier.
+enum MacroDiag {
+ MD_NoWarn, //> Not a reserved identifier
+ MD_KeywordDef, //> Macro hides keyword, enabled by default
+ MD_ReservedMacro //> #define of #undef reserved id, disabled by default
+};
+
+/// Checks if the specified identifier is reserved in the specified
+/// language.
+/// This function does not check if the identifier is a keyword.
+static bool isReservedId(StringRef Text, const LangOptions &Lang) {
+ // C++ [macro.names], C11 7.1.3:
+ // All identifiers that begin with an underscore and either an uppercase
+ // letter or another underscore are always reserved for any use.
+ if (Text.size() >= 2 && Text[0] == '_' &&
+ (isUppercase(Text[1]) || Text[1] == '_'))
+ return true;
+ // C++ [global.names]
+ // Each name that contains a double underscore ... is reserved to the
+ // implementation for any use.
+ if (Lang.CPlusPlus) {
+ if (Text.find("__") != StringRef::npos)
+ return true;
+ }
+ return false;
+}
+
+// The -fmodule-name option tells the compiler to textually include headers in
+// the specified module, meaning clang won't build the specified module. This is
+// useful in a number of situations, for instance, when building a library that
+// vends a module map, one might want to avoid hitting intermediate build
+// products containimg the the module map or avoid finding the system installed
+// modulemap for that library.
+static bool isForModuleBuilding(Module *M, StringRef CurrentModule,
+ StringRef ModuleName) {
+ StringRef TopLevelName = M->getTopLevelModuleName();
+
+ // When building framework Foo, we wanna make sure that Foo *and* Foo_Private
+ // are textually included and no modules are built for both.
+ if (M->getTopLevelModule()->IsFramework && CurrentModule == ModuleName &&
+ !CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private"))
+ TopLevelName = TopLevelName.drop_back(8);
+
+ return TopLevelName == CurrentModule;
+}
+
+static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
+ const LangOptions &Lang = PP.getLangOpts();
+ StringRef Text = II->getName();
+ if (isReservedId(Text, Lang))
+ return MD_ReservedMacro;
+ if (II->isKeyword(Lang))
+ return MD_KeywordDef;
+ if (Lang.CPlusPlus11 && (Text.equals("override") || Text.equals("final")))
+ return MD_KeywordDef;
+ return MD_NoWarn;
+}
+
+static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
+ const LangOptions &Lang = PP.getLangOpts();
+ StringRef Text = II->getName();
+ // Do not warn on keyword undef. It is generally harmless and widely used.
+ if (isReservedId(Text, Lang))
+ return MD_ReservedMacro;
+ return MD_NoWarn;
+}
+
+// Return true if we want to issue a diagnostic by default if we
+// encounter this name in a #include with the wrong case. For now,
+// this includes the standard C and C++ headers, Posix headers,
+// and Boost headers. Improper case for these #includes is a
+// potential portability issue.
+static bool warnByDefaultOnWrongCase(StringRef Include) {
+ // If the first component of the path is "boost", treat this like a standard header
+ // for the purposes of diagnostics.
+ if (::llvm::sys::path::begin(Include)->equals_lower("boost"))
+ return true;
+
+ // "condition_variable" is the longest standard header name at 18 characters.
+ // If the include file name is longer than that, it can't be a standard header.
+ static const size_t MaxStdHeaderNameLen = 18u;
+ if (Include.size() > MaxStdHeaderNameLen)
+ return false;
+
+ // Lowercase and normalize the search string.
+ SmallString<32> LowerInclude{Include};
+ for (char &Ch : LowerInclude) {
+ // In the ASCII range?
+ if (static_cast<unsigned char>(Ch) > 0x7f)
+ return false; // Can't be a standard header
+ // ASCII lowercase:
+ if (Ch >= 'A' && Ch <= 'Z')
+ Ch += 'a' - 'A';
+ // Normalize path separators for comparison purposes.
+ else if (::llvm::sys::path::is_separator(Ch))
+ Ch = '/';
+ }
+
+ // The standard C/C++ and Posix headers
+ return llvm::StringSwitch<bool>(LowerInclude)
+ // C library headers
+ .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
+ .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
+ .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
+ .Cases("stdatomic.h", "stdbool.h", "stddef.h", "stdint.h", "stdio.h", true)
+ .Cases("stdlib.h", "stdnoreturn.h", "string.h", "tgmath.h", "threads.h", true)
+ .Cases("time.h", "uchar.h", "wchar.h", "wctype.h", true)
+
+ // C++ headers for C library facilities
+ .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
+ .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
+ .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
+ .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
+ .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
+ .Case("cwctype", true)
+
+ // C++ library headers
+ .Cases("algorithm", "fstream", "list", "regex", "thread", true)
+ .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
+ .Cases("atomic", "future", "map", "set", "type_traits", true)
+ .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
+ .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
+ .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
+ .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
+ .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
+ .Cases("deque", "istream", "queue", "string", "valarray", true)
+ .Cases("exception", "iterator", "random", "strstream", "vector", true)
+ .Cases("forward_list", "limits", "ratio", "system_error", true)
+
+ // POSIX headers (which aren't also C headers)
+ .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
+ .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
+ .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
+ .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
+ .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
+ .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
+ .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
+ .Cases("sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
+ .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
+ .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
+ .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
+ .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
+ .Default(false);
+}
+
+bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
+ bool *ShadowFlag) {
+ // Missing macro name?
+ if (MacroNameTok.is(tok::eod))
+ return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
+
+ IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
+ if (!II)
+ return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
+
+ if (II->isCPlusPlusOperatorKeyword()) {
+ // C++ 2.5p2: Alternative tokens behave the same as its primary token
+ // except for their spellings.
+ Diag(MacroNameTok, getLangOpts().MicrosoftExt
+ ? diag::ext_pp_operator_used_as_macro_name
+ : diag::err_pp_operator_used_as_macro_name)
+ << II << MacroNameTok.getKind();
+ // Allow #defining |and| and friends for Microsoft compatibility or
+ // recovery when legacy C headers are included in C++.
+ }
+
+ if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
+ // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
+ return Diag(MacroNameTok, diag::err_defined_macro_name);
+ }
+
+ if (isDefineUndef == MU_Undef) {
+ auto *MI = getMacroInfo(II);
+ if (MI && MI->isBuiltinMacro()) {
+ // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4
+ // and C++ [cpp.predefined]p4], but allow it as an extension.
+ Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
+ }
+ }
+
+ // If defining/undefining reserved identifier or a keyword, we need to issue
+ // a warning.
+ SourceLocation MacroNameLoc = MacroNameTok.getLocation();
+ if (ShadowFlag)
+ *ShadowFlag = false;
+ if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
+ (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
+ MacroDiag D = MD_NoWarn;
+ if (isDefineUndef == MU_Define) {
+ D = shouldWarnOnMacroDef(*this, II);
+ }
+ else if (isDefineUndef == MU_Undef)
+ D = shouldWarnOnMacroUndef(*this, II);
+ if (D == MD_KeywordDef) {
+ // We do not want to warn on some patterns widely used in configuration
+ // scripts. This requires analyzing next tokens, so do not issue warnings
+ // now, only inform caller.
+ if (ShadowFlag)
+ *ShadowFlag = true;
+ }
+ if (D == MD_ReservedMacro)
+ Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
+ }
+
+ // Okay, we got a good identifier.
+ return false;
+}
+
+/// Lex and validate a macro name, which occurs after a
+/// \#define or \#undef.
+///
+/// This sets the token kind to eod and discards the rest of the macro line if
+/// the macro name is invalid.
+///
+/// \param MacroNameTok Token that is expected to be a macro name.
+/// \param isDefineUndef Context in which macro is used.
+/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
+void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
+ bool *ShadowFlag) {
+ // Read the token, don't allow macro expansion on it.
+ LexUnexpandedToken(MacroNameTok);
+
+ if (MacroNameTok.is(tok::code_completion)) {
+ if (CodeComplete)
+ CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
+ setCodeCompletionReached();
+ LexUnexpandedToken(MacroNameTok);
+ }
+
+ if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
+ return;
+
+ // Invalid macro name, read and discard the rest of the line and set the
+ // token kind to tok::eod if necessary.
+ if (MacroNameTok.isNot(tok::eod)) {
+ MacroNameTok.setKind(tok::eod);
+ DiscardUntilEndOfDirective();
+ }
+}
+
+/// Ensure that the next token is a tok::eod token.
+///
+/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is
+/// true, then we consider macros that expand to zero tokens as being ok.
+///
+/// Returns the location of the end of the directive.
+SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
+ bool EnableMacros) {
+ Token Tmp;
+ // Lex unexpanded tokens for most directives: macros might expand to zero
+ // tokens, causing us to miss diagnosing invalid lines. Some directives (like
+ // #line) allow empty macros.
+ if (EnableMacros)
+ Lex(Tmp);
+ else
+ LexUnexpandedToken(Tmp);
+
+ // There should be no tokens after the directive, but we allow them as an
+ // extension.
+ while (Tmp.is(tok::comment)) // Skip comments in -C mode.
+ LexUnexpandedToken(Tmp);
+
+ if (Tmp.is(tok::eod))
+ return Tmp.getLocation();
+
+ // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
+ // or if this is a macro-style preprocessing directive, because it is more
+ // trouble than it is worth to insert /**/ and check that there is no /**/
+ // in the range also.
+ FixItHint Hint;
+ if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
+ !CurTokenLexer)
+ Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
+ Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
+ return DiscardUntilEndOfDirective().getEnd();
+}
+
+Optional<unsigned> Preprocessor::getSkippedRangeForExcludedConditionalBlock(
+ SourceLocation HashLoc) {
+ if (!ExcludedConditionalDirectiveSkipMappings)
+ return None;
+ if (!HashLoc.isFileID())
+ return None;
+
+ std::pair<FileID, unsigned> HashFileOffset =
+ SourceMgr.getDecomposedLoc(HashLoc);
+ const llvm::MemoryBuffer *Buf = SourceMgr.getBuffer(HashFileOffset.first);
+ auto It = ExcludedConditionalDirectiveSkipMappings->find(Buf);
+ if (It == ExcludedConditionalDirectiveSkipMappings->end())
+ return None;
+
+ const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond();
+ // Check if the offset of '#' is mapped in the skipped ranges.
+ auto MappingIt = SkippedRanges.find(HashFileOffset.second);
+ if (MappingIt == SkippedRanges.end())
+ return None;
+
+ unsigned BytesToSkip = MappingIt->getSecond();
+ unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset();
+ assert(CurLexerBufferOffset >= HashFileOffset.second &&
+ "lexer is before the hash?");
+ // Take into account the fact that the lexer has already advanced, so the
+ // number of bytes to skip must be adjusted.
+ unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second;
+ assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?");
+ return BytesToSkip - LengthDiff;
+}
+
+/// SkipExcludedConditionalBlock - We just read a \#if or related directive and
+/// decided that the subsequent tokens are in the \#if'd out portion of the
+/// file. Lex the rest of the file, until we see an \#endif. If
+/// FoundNonSkipPortion is true, then we have already emitted code for part of
+/// this \#if directive, so \#else/\#elif blocks should never be entered.
+/// If ElseOk is true, then \#else directives are ok, if not, then we have
+/// already seen one so a \#else directive is a duplicate. When this returns,
+/// the caller can lex the first valid token.
+void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
+ SourceLocation IfTokenLoc,
+ bool FoundNonSkipPortion,
+ bool FoundElse,
+ SourceLocation ElseLoc) {
+ ++NumSkipped;
+ assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?");
+
+ if (PreambleConditionalStack.reachedEOFWhileSkipping())
+ PreambleConditionalStack.clearSkipInfo();
+ else
+ CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
+ FoundNonSkipPortion, FoundElse);
+
+ // Enter raw mode to disable identifier lookup (and thus macro expansion),
+ // disabling warnings, etc.
+ CurPPLexer->LexingRawMode = true;
+ Token Tok;
+ if (auto SkipLength =
+ getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) {
+ // Skip to the next '#endif' / '#else' / '#elif'.
+ CurLexer->skipOver(*SkipLength);
+ }
+ while (true) {
+ CurLexer->Lex(Tok);
+
+ if (Tok.is(tok::code_completion)) {
+ if (CodeComplete)
+ CodeComplete->CodeCompleteInConditionalExclusion();
+ setCodeCompletionReached();
+ continue;
+ }
+
+ // If this is the end of the buffer, we have an error.
+ if (Tok.is(tok::eof)) {
+ // We don't emit errors for unterminated conditionals here,
+ // Lexer::LexEndOfFile can do that properly.
+ // Just return and let the caller lex after this #include.
+ if (PreambleConditionalStack.isRecording())
+ PreambleConditionalStack.SkipInfo.emplace(
+ HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc);
+ break;
+ }
+
+ // If this token is not a preprocessor directive, just skip it.
+ if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
+ continue;
+
+ // We just parsed a # character at the start of a line, so we're in
+ // directive mode. Tell the lexer this so any newlines we see will be
+ // converted into an EOD token (this terminates the macro).
+ CurPPLexer->ParsingPreprocessorDirective = true;
+ if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
+
+
+ // Read the next token, the directive flavor.
+ LexUnexpandedToken(Tok);
+
+ // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
+ // something bogus), skip it.
+ if (Tok.isNot(tok::raw_identifier)) {
+ CurPPLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
+ continue;
+ }
+
+ // If the first letter isn't i or e, it isn't intesting to us. We know that
+ // this is safe in the face of spelling differences, because there is no way
+ // to spell an i/e in a strange way that is another letter. Skipping this
+ // allows us to avoid looking up the identifier info for #define/#undef and
+ // other common directives.
+ StringRef RI = Tok.getRawIdentifier();
+
+ char FirstChar = RI[0];
+ if (FirstChar >= 'a' && FirstChar <= 'z' &&
+ FirstChar != 'i' && FirstChar != 'e') {
+ CurPPLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
+ continue;
+ }
+
+ // Get the identifier name without trigraphs or embedded newlines. Note
+ // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
+ // when skipping.
+ char DirectiveBuf[20];
+ StringRef Directive;
+ if (!Tok.needsCleaning() && RI.size() < 20) {
+ Directive = RI;
+ } else {
+ std::string DirectiveStr = getSpelling(Tok);
+ size_t IdLen = DirectiveStr.size();
+ if (IdLen >= 20) {
+ CurPPLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
+ continue;
+ }
+ memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
+ Directive = StringRef(DirectiveBuf, IdLen);
+ }
+
+ if (Directive.startswith("if")) {
+ StringRef Sub = Directive.substr(2);
+ if (Sub.empty() || // "if"
+ Sub == "def" || // "ifdef"
+ Sub == "ndef") { // "ifndef"
+ // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
+ // bother parsing the condition.
+ DiscardUntilEndOfDirective();
+ CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
+ /*foundnonskip*/false,
+ /*foundelse*/false);
+ }
+ } else if (Directive[0] == 'e') {
+ StringRef Sub = Directive.substr(1);
+ if (Sub == "ndif") { // "endif"
+ PPConditionalInfo CondInfo;
+ CondInfo.WasSkipping = true; // Silence bogus warning.
+ bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
+ (void)InCond; // Silence warning in no-asserts mode.
+ assert(!InCond && "Can't be skipping if not in a conditional!");
+
+ // If we popped the outermost skipping block, we're done skipping!
+ if (!CondInfo.WasSkipping) {
+ // Restore the value of LexingRawMode so that trailing comments
+ // are handled correctly, if we've reached the outermost block.
+ CurPPLexer->LexingRawMode = false;
+ CheckEndOfDirective("endif");
+ CurPPLexer->LexingRawMode = true;
+ if (Callbacks)
+ Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
+ break;
+ } else {
+ DiscardUntilEndOfDirective();
+ }
+ } else if (Sub == "lse") { // "else".
+ // #else directive in a skipping conditional. If not in some other
+ // skipping conditional, and if #else hasn't already been seen, enter it
+ // as a non-skipping conditional.
+ PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
+
+ // If this is a #else with a #else before it, report the error.
+ if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else);
+
+ // Note that we've seen a #else in this conditional.
+ CondInfo.FoundElse = true;
+
+ // If the conditional is at the top level, and the #if block wasn't
+ // entered, enter the #else block now.
+ if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
+ CondInfo.FoundNonSkip = true;
+ // Restore the value of LexingRawMode so that trailing comments
+ // are handled correctly.
+ CurPPLexer->LexingRawMode = false;
+ CheckEndOfDirective("else");
+ CurPPLexer->LexingRawMode = true;
+ if (Callbacks)
+ Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
+ break;
+ } else {
+ DiscardUntilEndOfDirective(); // C99 6.10p4.
+ }
+ } else if (Sub == "lif") { // "elif".
+ PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
+
+ // If this is a #elif with a #else before it, report the error.
+ if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else);
+
+ // If this is in a skipping block or if we're already handled this #if
+ // block, don't bother parsing the condition.
+ if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
+ DiscardUntilEndOfDirective();
+ } else {
+ // Restore the value of LexingRawMode so that identifiers are
+ // looked up, etc, inside the #elif expression.
+ assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
+ CurPPLexer->LexingRawMode = false;
+ IdentifierInfo *IfNDefMacro = nullptr;
+ DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
+ const bool CondValue = DER.Conditional;
+ CurPPLexer->LexingRawMode = true;
+ if (Callbacks) {
+ Callbacks->Elif(
+ Tok.getLocation(), DER.ExprRange,
+ (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
+ CondInfo.IfLoc);
+ }
+ // If this condition is true, enter it!
+ if (CondValue) {
+ CondInfo.FoundNonSkip = true;
+ break;
+ }
+ }
+ }
+ }
+
+ CurPPLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
+ }
+
+ // Finally, if we are out of the conditional (saw an #endif or ran off the end
+ // of the file, just stop skipping and return to lexing whatever came after
+ // the #if block.
+ CurPPLexer->LexingRawMode = false;
+
+ // The last skipped range isn't actually skipped yet if it's truncated
+ // by the end of the preamble; we'll resume parsing after the preamble.
+ if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
+ Callbacks->SourceRangeSkipped(
+ SourceRange(HashTokenLoc, CurPPLexer->getSourceLocation()),
+ Tok.getLocation());
+}
+
+Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {
+ if (!SourceMgr.isInMainFile(Loc)) {
+ // Try to determine the module of the include directive.
+ // FIXME: Look into directly passing the FileEntry from LookupFile instead.
+ FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
+ if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) {
+ // The include comes from an included file.
+ return HeaderInfo.getModuleMap()
+ .findModuleForHeader(EntryOfIncl)
+ .getModule();
+ }
+ }
+
+ // This is either in the main file or not in a file at all. It belongs
+ // to the current module, if there is one.
+ return getLangOpts().CurrentModule.empty()
+ ? nullptr
+ : HeaderInfo.lookupModule(getLangOpts().CurrentModule);
+}
+
+const FileEntry *
+Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
+ Module *M,
+ SourceLocation Loc) {
+ assert(M && "no module to include");
+
+ // If the context is the global module fragment of some module, we never
+ // want to return that file; instead, we want the innermost include-guarded
+ // header that it included.
+ bool InGlobalModuleFragment = M->Kind == Module::GlobalModuleFragment;
+
+ // If we have a module import syntax, we shouldn't include a header to
+ // make a particular module visible.
+ if ((getLangOpts().ObjC || getLangOpts().CPlusPlusModules ||
+ getLangOpts().ModulesTS) &&
+ !InGlobalModuleFragment)
+ return nullptr;
+
+ Module *TopM = M->getTopLevelModule();
+ Module *IncM = getModuleForLocation(IncLoc);
+
+ // Walk up through the include stack, looking through textual headers of M
+ // until we hit a non-textual header that we can #include. (We assume textual
+ // headers of a module with non-textual headers aren't meant to be used to
+ // import entities from the module.)
+ auto &SM = getSourceManager();
+ while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
+ auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
+ auto *FE = SM.getFileEntryForID(ID);
+ if (!FE)
+ break;
+
+ if (InGlobalModuleFragment) {
+ if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE))
+ return FE;
+ Loc = SM.getIncludeLoc(ID);
+ continue;
+ }
+
+ bool InTextualHeader = false;
+ for (auto Header : HeaderInfo.getModuleMap().findAllModulesForHeader(FE)) {
+ if (!Header.getModule()->isSubModuleOf(TopM))
+ continue;
+
+ if (!(Header.getRole() & ModuleMap::TextualHeader)) {
+ // If this is an accessible, non-textual header of M's top-level module
+ // that transitively includes the given location and makes the
+ // corresponding module visible, this is the thing to #include.
+ if (Header.isAccessibleFrom(IncM))
+ return FE;
+
+ // It's in a private header; we can't #include it.
+ // FIXME: If there's a public header in some module that re-exports it,
+ // then we could suggest including that, but it's not clear that's the
+ // expected way to make this entity visible.
+ continue;
+ }
+
+ InTextualHeader = true;
+ }
+
+ if (!InTextualHeader)
+ break;
+
+ Loc = SM.getIncludeLoc(ID);
+ }
+
+ return nullptr;
+}
+
+Optional<FileEntryRef> Preprocessor::LookupFile(
+ SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
+ const DirectoryLookup *FromDir, const FileEntry *FromFile,
+ const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath,
+ SmallVectorImpl<char> *RelativePath,
+ ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
+ bool *IsFrameworkFound, bool SkipCache) {
+ Module *RequestingModule = getModuleForLocation(FilenameLoc);
+ bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc);
+
+ // If the header lookup mechanism may be relative to the current inclusion
+ // stack, record the parent #includes.
+ SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 16>
+ Includers;
+ bool BuildSystemModule = false;
+ if (!FromDir && !FromFile) {
+ FileID FID = getCurrentFileLexer()->getFileID();
+ const FileEntry *FileEnt = SourceMgr.getFileEntryForID(FID);
+
+ // If there is no file entry associated with this file, it must be the
+ // predefines buffer or the module includes buffer. Any other file is not
+ // lexed with a normal lexer, so it won't be scanned for preprocessor
+ // directives.
+ //
+ // If we have the predefines buffer, resolve #include references (which come
+ // from the -include command line argument) from the current working
+ // directory instead of relative to the main file.
+ //
+ // If we have the module includes buffer, resolve #include references (which
+ // come from header declarations in the module map) relative to the module
+ // map file.
+ if (!FileEnt) {
+ if (FID == SourceMgr.getMainFileID() && MainFileDir) {
+ Includers.push_back(std::make_pair(nullptr, MainFileDir));
+ BuildSystemModule = getCurrentModule()->IsSystem;
+ } else if ((FileEnt =
+ SourceMgr.getFileEntryForID(SourceMgr.getMainFileID())))
+ Includers.push_back(std::make_pair(FileEnt, *FileMgr.getDirectory(".")));
+ } else {
+ Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
+ }
+
+ // MSVC searches the current include stack from top to bottom for
+ // headers included by quoted include directives.
+ // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
+ if (LangOpts.MSVCCompat && !isAngled) {
+ for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
+ if (IsFileLexer(ISEntry))
+ if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
+ Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
+ }
+ }
+ }
+
+ CurDir = CurDirLookup;
+
+ if (FromFile) {
+ // We're supposed to start looking from after a particular file. Search
+ // the include path until we find that file or run out of files.
+ const DirectoryLookup *TmpCurDir = CurDir;
+ const DirectoryLookup *TmpFromDir = nullptr;
+ while (Optional<FileEntryRef> FE = HeaderInfo.LookupFile(
+ Filename, FilenameLoc, isAngled, TmpFromDir, TmpCurDir,
+ Includers, SearchPath, RelativePath, RequestingModule,
+ SuggestedModule, /*IsMapped=*/nullptr,
+ /*IsFrameworkFound=*/nullptr, SkipCache)) {
+ // Keep looking as if this file did a #include_next.
+ TmpFromDir = TmpCurDir;
+ ++TmpFromDir;
+ if (&FE->getFileEntry() == FromFile) {
+ // Found it.
+ FromDir = TmpFromDir;
+ CurDir = TmpCurDir;
+ break;
+ }
+ }
+ }
+
+ // Do a standard file entry lookup.
+ Optional<FileEntryRef> FE = HeaderInfo.LookupFile(
+ Filename, FilenameLoc, isAngled, FromDir, CurDir, Includers, SearchPath,
+ RelativePath, RequestingModule, SuggestedModule, IsMapped,
+ IsFrameworkFound, SkipCache, BuildSystemModule);
+ if (FE) {
+ if (SuggestedModule && !LangOpts.AsmPreprocessor)
+ HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
+ RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
+ Filename, &FE->getFileEntry());
+ return FE;
+ }
+
+ const FileEntry *CurFileEnt;
+ // Otherwise, see if this is a subframework header. If so, this is relative
+ // to one of the headers on the #include stack. Walk the list of the current
+ // headers on the #include stack and pass them to HeaderInfo.
+ if (IsFileLexer()) {
+ if ((CurFileEnt = CurPPLexer->getFileEntry())) {
+ if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader(
+ Filename, CurFileEnt, SearchPath, RelativePath, RequestingModule,
+ SuggestedModule)) {
+ if (SuggestedModule && !LangOpts.AsmPreprocessor)
+ HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
+ RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
+ Filename, &FE->getFileEntry());
+ return FE;
+ }
+ }
+ }
+
+ for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
+ if (IsFileLexer(ISEntry)) {
+ if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
+ if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader(
+ Filename, CurFileEnt, SearchPath, RelativePath,
+ RequestingModule, SuggestedModule)) {
+ if (SuggestedModule && !LangOpts.AsmPreprocessor)
+ HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
+ RequestingModule, RequestingModuleIsModuleInterface,
+ FilenameLoc, Filename, &FE->getFileEntry());
+ return FE;
+ }
+ }
+ }
+ }
+
+ // Otherwise, we really couldn't find the file.
+ return None;
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+class Preprocessor::ResetMacroExpansionHelper {
+public:
+ ResetMacroExpansionHelper(Preprocessor *pp)
+ : PP(pp), save(pp->DisableMacroExpansion) {
+ if (pp->MacroExpansionInDirectivesOverride)
+ pp->DisableMacroExpansion = false;
+ }
+
+ ~ResetMacroExpansionHelper() {
+ PP->DisableMacroExpansion = save;
+ }
+
+private:
+ Preprocessor *PP;
+ bool save;
+};
+
+/// Process a directive while looking for the through header or a #pragma
+/// hdrstop. The following directives are handled:
+/// #include (to check if it is the through header)
+/// #define (to warn about macros that don't match the PCH)
+/// #pragma (to check for pragma hdrstop).
+/// All other directives are completely discarded.
+void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
+ SourceLocation HashLoc) {
+ if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
+ if (II->getPPKeywordID() == tok::pp_define) {
+ return HandleDefineDirective(Result,
+ /*ImmediatelyAfterHeaderGuard=*/false);
+ }
+ if (SkippingUntilPCHThroughHeader &&
+ II->getPPKeywordID() == tok::pp_include) {
+ return HandleIncludeDirective(HashLoc, Result);
+ }
+ if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
+ Lex(Result);
+ auto *II = Result.getIdentifierInfo();
+ if (II && II->getName() == "hdrstop")
+ return HandlePragmaHdrstop(Result);
+ }
+ }
+ DiscardUntilEndOfDirective();
+}
+
+/// HandleDirective - This callback is invoked when the lexer sees a # token
+/// at the start of a line. This consumes the directive, modifies the
+/// lexer/preprocessor state, and advances the lexer(s) so that the next token
+/// read is the correct one.
+void Preprocessor::HandleDirective(Token &Result) {
+ // FIXME: Traditional: # with whitespace before it not recognized by K&R?
+
+ // We just parsed a # character at the start of a line, so we're in directive
+ // mode. Tell the lexer this so any newlines we see will be converted into an
+ // EOD token (which terminates the directive).
+ CurPPLexer->ParsingPreprocessorDirective = true;
+ if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
+
+ bool ImmediatelyAfterTopLevelIfndef =
+ CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
+ CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
+
+ ++NumDirectives;
+
+ // We are about to read a token. For the multiple-include optimization FA to
+ // work, we have to remember if we had read any tokens *before* this
+ // pp-directive.
+ bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
+
+ // Save the '#' token in case we need to return it later.
+ Token SavedHash = Result;
+
+ // Read the next token, the directive flavor. This isn't expanded due to
+ // C99 6.10.3p8.
+ LexUnexpandedToken(Result);
+
+ // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
+ // #define A(x) #x
+ // A(abc
+ // #warning blah
+ // def)
+ // If so, the user is relying on undefined behavior, emit a diagnostic. Do
+ // not support this for #include-like directives, since that can result in
+ // terrible diagnostics, and does not work in GCC.
+ if (InMacroArgs) {
+ if (IdentifierInfo *II = Result.getIdentifierInfo()) {
+ switch (II->getPPKeywordID()) {
+ case tok::pp_include:
+ case tok::pp_import:
+ case tok::pp_include_next:
+ case tok::pp___include_macros:
+ case tok::pp_pragma:
+ Diag(Result, diag::err_embedded_directive) << II->getName();
+ Diag(*ArgMacro, diag::note_macro_expansion_here)
+ << ArgMacro->getIdentifierInfo();
+ DiscardUntilEndOfDirective();
+ return;
+ default:
+ break;
+ }
+ }
+ Diag(Result, diag::ext_embedded_directive);
+ }
+
+ // Temporarily enable macro expansion if set so
+ // and reset to previous state when returning from this function.
+ ResetMacroExpansionHelper helper(this);
+
+ if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
+ return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
+
+ switch (Result.getKind()) {
+ case tok::eod:
+ return; // null directive.
+ case tok::code_completion:
+ if (CodeComplete)
+ CodeComplete->CodeCompleteDirective(
+ CurPPLexer->getConditionalStackDepth() > 0);
+ setCodeCompletionReached();
+ return;
+ case tok::numeric_constant: // # 7 GNU line marker directive.
+ if (getLangOpts().AsmPreprocessor)
+ break; // # 4 is not a preprocessor directive in .S files.
+ return HandleDigitDirective(Result);
+ default:
+ IdentifierInfo *II = Result.getIdentifierInfo();
+ if (!II) break; // Not an identifier.
+
+ // Ask what the preprocessor keyword ID is.
+ switch (II->getPPKeywordID()) {
+ default: break;
+ // C99 6.10.1 - Conditional Inclusion.
+ case tok::pp_if:
+ return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
+ case tok::pp_ifdef:
+ return HandleIfdefDirective(Result, SavedHash, false,
+ true /*not valid for miopt*/);
+ case tok::pp_ifndef:
+ return HandleIfdefDirective(Result, SavedHash, true,
+ ReadAnyTokensBeforeDirective);
+ case tok::pp_elif:
+ return HandleElifDirective(Result, SavedHash);
+ case tok::pp_else:
+ return HandleElseDirective(Result, SavedHash);
+ case tok::pp_endif:
+ return HandleEndifDirective(Result);
+
+ // C99 6.10.2 - Source File Inclusion.
+ case tok::pp_include:
+ // Handle #include.
+ return HandleIncludeDirective(SavedHash.getLocation(), Result);
+ case tok::pp___include_macros:
+ // Handle -imacros.
+ return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
+
+ // C99 6.10.3 - Macro Replacement.
+ case tok::pp_define:
+ return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
+ case tok::pp_undef:
+ return HandleUndefDirective();
+
+ // C99 6.10.4 - Line Control.
+ case tok::pp_line:
+ return HandleLineDirective();
+
+ // C99 6.10.5 - Error Directive.
+ case tok::pp_error:
+ return HandleUserDiagnosticDirective(Result, false);
+
+ // C99 6.10.6 - Pragma Directive.
+ case tok::pp_pragma:
+ return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
+
+ // GNU Extensions.
+ case tok::pp_import:
+ return HandleImportDirective(SavedHash.getLocation(), Result);
+ case tok::pp_include_next:
+ return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
+
+ case tok::pp_warning:
+ Diag(Result, diag::ext_pp_warning_directive);
+ return HandleUserDiagnosticDirective(Result, true);
+ case tok::pp_ident:
+ return HandleIdentSCCSDirective(Result);
+ case tok::pp_sccs:
+ return HandleIdentSCCSDirective(Result);
+ case tok::pp_assert:
+ //isExtension = true; // FIXME: implement #assert
+ break;
+ case tok::pp_unassert:
+ //isExtension = true; // FIXME: implement #unassert
+ break;
+
+ case tok::pp___public_macro:
+ if (getLangOpts().Modules)
+ return HandleMacroPublicDirective(Result);
+ break;
+
+ case tok::pp___private_macro:
+ if (getLangOpts().Modules)
+ return HandleMacroPrivateDirective();
+ break;
+ }
+ break;
+ }
+
+ // If this is a .S file, treat unknown # directives as non-preprocessor
+ // directives. This is important because # may be a comment or introduce
+ // various pseudo-ops. Just return the # token and push back the following
+ // token to be lexed next time.
+ if (getLangOpts().AsmPreprocessor) {
+ auto Toks = std::make_unique<Token[]>(2);
+ // Return the # and the token after it.
+ Toks[0] = SavedHash;
+ Toks[1] = Result;
+
+ // If the second token is a hashhash token, then we need to translate it to
+ // unknown so the token lexer doesn't try to perform token pasting.
+ if (Result.is(tok::hashhash))
+ Toks[1].setKind(tok::unknown);
+
+ // Enter this token stream so that we re-lex the tokens. Make sure to
+ // enable macro expansion, in case the token after the # is an identifier
+ // that is expanded.
+ EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
+ return;
+ }
+
+ // If we reached here, the preprocessing token is not valid!
+ Diag(Result, diag::err_pp_invalid_directive);
+
+ // Read the rest of the PP line.
+ DiscardUntilEndOfDirective();
+
+ // Okay, we're done parsing the directive.
+}
+
+/// GetLineValue - Convert a numeric token into an unsigned value, emitting
+/// Diagnostic DiagID if it is invalid, and returning the value in Val.
+static bool GetLineValue(Token &DigitTok, unsigned &Val,
+ unsigned DiagID, Preprocessor &PP,
+ bool IsGNULineDirective=false) {
+ if (DigitTok.isNot(tok::numeric_constant)) {
+ PP.Diag(DigitTok, DiagID);
+
+ if (DigitTok.isNot(tok::eod))
+ PP.DiscardUntilEndOfDirective();
+ return true;
+ }
+
+ SmallString<64> IntegerBuffer;
+ IntegerBuffer.resize(DigitTok.getLength());
+ const char *DigitTokBegin = &IntegerBuffer[0];
+ bool Invalid = false;
+ unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
+ if (Invalid)
+ return true;
+
+ // Verify that we have a simple digit-sequence, and compute the value. This
+ // is always a simple digit string computed in decimal, so we do this manually
+ // here.
+ Val = 0;
+ for (unsigned i = 0; i != ActualLength; ++i) {
+ // C++1y [lex.fcon]p1:
+ // Optional separating single quotes in a digit-sequence are ignored
+ if (DigitTokBegin[i] == '\'')
+ continue;
+
+ if (!isDigit(DigitTokBegin[i])) {
+ PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
+ diag::err_pp_line_digit_sequence) << IsGNULineDirective;
+ PP.DiscardUntilEndOfDirective();
+ return true;
+ }
+
+ unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
+ if (NextVal < Val) { // overflow.
+ PP.Diag(DigitTok, DiagID);
+ PP.DiscardUntilEndOfDirective();
+ return true;
+ }
+ Val = NextVal;
+ }
+
+ if (DigitTokBegin[0] == '0' && Val)
+ PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
+ << IsGNULineDirective;
+
+ return false;
+}
+
+/// Handle a \#line directive: C99 6.10.4.
+///
+/// The two acceptable forms are:
+/// \verbatim
+/// # line digit-sequence
+/// # line digit-sequence "s-char-sequence"
+/// \endverbatim
+void Preprocessor::HandleLineDirective() {
+ // Read the line # and string argument. Per C99 6.10.4p5, these tokens are
+ // expanded.
+ Token DigitTok;
+ Lex(DigitTok);
+
+ // Validate the number and convert it to an unsigned.
+ unsigned LineNo;
+ if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
+ return;
+
+ if (LineNo == 0)
+ Diag(DigitTok, diag::ext_pp_line_zero);
+
+ // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
+ // number greater than 2147483647". C90 requires that the line # be <= 32767.
+ unsigned LineLimit = 32768U;
+ if (LangOpts.C99 || LangOpts.CPlusPlus11)
+ LineLimit = 2147483648U;
+ if (LineNo >= LineLimit)
+ Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
+ else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
+ Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
+
+ int FilenameID = -1;
+ Token StrTok;
+ Lex(StrTok);
+
+ // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
+ // string followed by eod.
+ if (StrTok.is(tok::eod))
+ ; // ok
+ else if (StrTok.isNot(tok::string_literal)) {
+ Diag(StrTok, diag::err_pp_line_invalid_filename);
+ DiscardUntilEndOfDirective();
+ return;
+ } else if (StrTok.hasUDSuffix()) {
+ Diag(StrTok, diag::err_invalid_string_udl);
+ DiscardUntilEndOfDirective();
+ return;
+ } else {
+ // Parse and validate the string, converting it into a unique ID.
+ StringLiteralParser Literal(StrTok, *this);
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
+ if (Literal.hadError) {
+ DiscardUntilEndOfDirective();
+ return;
+ }
+ if (Literal.Pascal) {
+ Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
+ DiscardUntilEndOfDirective();
+ return;
+ }
+ FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
+
+ // Verify that there is nothing after the string, other than EOD. Because
+ // of C99 6.10.4p5, macros that expand to empty tokens are ok.
+ CheckEndOfDirective("line", true);
+ }
+
+ // Take the file kind of the file containing the #line directive. #line
+ // directives are often used for generated sources from the same codebase, so
+ // the new file should generally be classified the same way as the current
+ // file. This is visible in GCC's pre-processed output, which rewrites #line
+ // to GNU line markers.
+ SrcMgr::CharacteristicKind FileKind =
+ SourceMgr.getFileCharacteristic(DigitTok.getLocation());
+
+ SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
+ false, FileKind);
+
+ if (Callbacks)
+ Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
+ PPCallbacks::RenameFile, FileKind);
+}
+
+/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
+/// marker directive.
+static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
+ SrcMgr::CharacteristicKind &FileKind,
+ Preprocessor &PP) {
+ unsigned FlagVal;
+ Token FlagTok;
+ PP.Lex(FlagTok);
+ if (FlagTok.is(tok::eod)) return false;
+ if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
+ return true;
+
+ if (FlagVal == 1) {
+ IsFileEntry = true;
+
+ PP.Lex(FlagTok);
+ if (FlagTok.is(tok::eod)) return false;
+ if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
+ return true;
+ } else if (FlagVal == 2) {
+ IsFileExit = true;
+
+ SourceManager &SM = PP.getSourceManager();
+ // If we are leaving the current presumed file, check to make sure the
+ // presumed include stack isn't empty!
+ FileID CurFileID =
+ SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
+ PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
+ if (PLoc.isInvalid())
+ return true;
+
+ // If there is no include loc (main file) or if the include loc is in a
+ // different physical file, then we aren't in a "1" line marker flag region.
+ SourceLocation IncLoc = PLoc.getIncludeLoc();
+ if (IncLoc.isInvalid() ||
+ SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
+ PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
+ PP.DiscardUntilEndOfDirective();
+ return true;
+ }
+
+ PP.Lex(FlagTok);
+ if (FlagTok.is(tok::eod)) return false;
+ if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
+ return true;
+ }
+
+ // We must have 3 if there are still flags.
+ if (FlagVal != 3) {
+ PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
+ PP.DiscardUntilEndOfDirective();
+ return true;
+ }
+
+ FileKind = SrcMgr::C_System;
+
+ PP.Lex(FlagTok);
+ if (FlagTok.is(tok::eod)) return false;
+ if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
+ return true;
+
+ // We must have 4 if there is yet another flag.
+ if (FlagVal != 4) {
+ PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
+ PP.DiscardUntilEndOfDirective();
+ return true;
+ }
+
+ FileKind = SrcMgr::C_ExternCSystem;
+
+ PP.Lex(FlagTok);
+ if (FlagTok.is(tok::eod)) return false;
+
+ // There are no more valid flags here.
+ PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
+ PP.DiscardUntilEndOfDirective();
+ return true;
+}
+
+/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
+/// one of the following forms:
+///
+/// # 42
+/// # 42 "file" ('1' | '2')?
+/// # 42 "file" ('1' | '2')? '3' '4'?
+///
+void Preprocessor::HandleDigitDirective(Token &DigitTok) {
+ // Validate the number and convert it to an unsigned. GNU does not have a
+ // line # limit other than it fit in 32-bits.
+ unsigned LineNo;
+ if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
+ *this, true))
+ return;
+
+ Token StrTok;
+ Lex(StrTok);
+
+ bool IsFileEntry = false, IsFileExit = false;
+ int FilenameID = -1;
+ SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
+
+ // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
+ // string followed by eod.
+ if (StrTok.is(tok::eod)) {
+ // Treat this like "#line NN", which doesn't change file characteristics.
+ FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
+ } else if (StrTok.isNot(tok::string_literal)) {
+ Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
+ DiscardUntilEndOfDirective();
+ return;
+ } else if (StrTok.hasUDSuffix()) {
+ Diag(StrTok, diag::err_invalid_string_udl);
+ DiscardUntilEndOfDirective();
+ return;
+ } else {
+ // Parse and validate the string, converting it into a unique ID.
+ StringLiteralParser Literal(StrTok, *this);
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
+ if (Literal.hadError) {
+ DiscardUntilEndOfDirective();
+ return;
+ }
+ if (Literal.Pascal) {
+ Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
+ DiscardUntilEndOfDirective();
+ return;
+ }
+ FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
+
+ // If a filename was present, read any flags that are present.
+ if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
+ return;
+ }
+
+ // Create a line note with this information.
+ SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
+ IsFileExit, FileKind);
+
+ // If the preprocessor has callbacks installed, notify them of the #line
+ // change. This is used so that the line marker comes out in -E mode for
+ // example.
+ if (Callbacks) {
+ PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
+ if (IsFileEntry)
+ Reason = PPCallbacks::EnterFile;
+ else if (IsFileExit)
+ Reason = PPCallbacks::ExitFile;
+
+ Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
+ }
+}
+
+/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
+///
+void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
+ bool isWarning) {
+ // Read the rest of the line raw. We do this because we don't want macros
+ // to be expanded and we don't require that the tokens be valid preprocessing
+ // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
+ // collapse multiple consecutive white space between tokens, but this isn't
+ // specified by the standard.
+ SmallString<128> Message;
+ CurLexer->ReadToEndOfLine(&Message);
+
+ // Find the first non-whitespace character, so that we can make the
+ // diagnostic more succinct.
+ StringRef Msg = StringRef(Message).ltrim(' ');
+
+ if (isWarning)
+ Diag(Tok, diag::pp_hash_warning) << Msg;
+ else
+ Diag(Tok, diag::err_pp_hash_error) << Msg;
+}
+
+/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
+///
+void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
+ // Yes, this directive is an extension.
+ Diag(Tok, diag::ext_pp_ident_directive);
+
+ // Read the string argument.
+ Token StrTok;
+ Lex(StrTok);
+
+ // If the token kind isn't a string, it's a malformed directive.
+ if (StrTok.isNot(tok::string_literal) &&
+ StrTok.isNot(tok::wide_string_literal)) {
+ Diag(StrTok, diag::err_pp_malformed_ident);
+ if (StrTok.isNot(tok::eod))
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ if (StrTok.hasUDSuffix()) {
+ Diag(StrTok, diag::err_invalid_string_udl);
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // Verify that there is nothing after the string, other than EOD.
+ CheckEndOfDirective("ident");
+
+ if (Callbacks) {
+ bool Invalid = false;
+ std::string Str = getSpelling(StrTok, &Invalid);
+ if (!Invalid)
+ Callbacks->Ident(Tok.getLocation(), Str);
+ }
+}
+
+/// Handle a #public directive.
+void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok, MU_Undef);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eod))
+ return;
+
+ // Check to see if this is the last token on the #__public_macro line.
+ CheckEndOfDirective("__public_macro");
+
+ IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
+ // Okay, we finally have a valid identifier to undef.
+ MacroDirective *MD = getLocalMacroDirective(II);
+
+ // If the macro is not defined, this is an error.
+ if (!MD) {
+ Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
+ return;
+ }
+
+ // Note that this macro has now been exported.
+ appendMacroDirective(II, AllocateVisibilityMacroDirective(
+ MacroNameTok.getLocation(), /*isPublic=*/true));
+}
+
+/// Handle a #private directive.
+void Preprocessor::HandleMacroPrivateDirective() {
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok, MU_Undef);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eod))
+ return;
+
+ // Check to see if this is the last token on the #__private_macro line.
+ CheckEndOfDirective("__private_macro");
+
+ IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
+ // Okay, we finally have a valid identifier to undef.
+ MacroDirective *MD = getLocalMacroDirective(II);
+
+ // If the macro is not defined, this is an error.
+ if (!MD) {
+ Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
+ return;
+ }
+
+ // Note that this macro has now been marked private.
+ appendMacroDirective(II, AllocateVisibilityMacroDirective(
+ MacroNameTok.getLocation(), /*isPublic=*/false));
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Include Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
+/// checked and spelled filename, e.g. as an operand of \#include. This returns
+/// true if the input filename was in <>'s or false if it were in ""'s. The
+/// caller is expected to provide a buffer that is large enough to hold the
+/// spelling of the filename, but is also expected to handle the case when
+/// this method decides to use a different buffer.
+bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
+ StringRef &Buffer) {
+ // Get the text form of the filename.
+ assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
+
+ // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
+ // C++20 [lex.header]/2:
+ //
+ // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
+ // in C: behavior is undefined
+ // in C++: program is conditionally-supported with implementation-defined
+ // semantics
+
+ // Make sure the filename is <x> or "x".
+ bool isAngled;
+ if (Buffer[0] == '<') {
+ if (Buffer.back() != '>') {
+ Diag(Loc, diag::err_pp_expects_filename);
+ Buffer = StringRef();
+ return true;
+ }
+ isAngled = true;
+ } else if (Buffer[0] == '"') {
+ if (Buffer.back() != '"') {
+ Diag(Loc, diag::err_pp_expects_filename);
+ Buffer = StringRef();
+ return true;
+ }
+ isAngled = false;
+ } else {
+ Diag(Loc, diag::err_pp_expects_filename);
+ Buffer = StringRef();
+ return true;
+ }
+
+ // Diagnose #include "" as invalid.
+ if (Buffer.size() <= 2) {
+ Diag(Loc, diag::err_pp_empty_filename);
+ Buffer = StringRef();
+ return true;
+ }
+
+ // Skip the brackets.
+ Buffer = Buffer.substr(1, Buffer.size()-2);
+ return isAngled;
+}
+
+/// Push a token onto the token stream containing an annotation.
+void Preprocessor::EnterAnnotationToken(SourceRange Range,
+ tok::TokenKind Kind,
+ void *AnnotationVal) {
+ // FIXME: Produce this as the current token directly, rather than
+ // allocating a new token for it.
+ auto Tok = std::make_unique<Token[]>(1);
+ Tok[0].startToken();
+ Tok[0].setKind(Kind);
+ Tok[0].setLocation(Range.getBegin());
+ Tok[0].setAnnotationEndLoc(Range.getEnd());
+ Tok[0].setAnnotationValue(AnnotationVal);
+ EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
+}
+
+/// Produce a diagnostic informing the user that a #include or similar
+/// was implicitly treated as a module import.
+static void diagnoseAutoModuleImport(
+ Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
+ ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
+ SourceLocation PathEnd) {
+ StringRef ImportKeyword;
+ if (PP.getLangOpts().ObjC)
+ ImportKeyword = "@import";
+ else if (PP.getLangOpts().ModulesTS || PP.getLangOpts().CPlusPlusModules)
+ ImportKeyword = "import";
+ else
+ return; // no import syntax available
+
+ SmallString<128> PathString;
+ for (size_t I = 0, N = Path.size(); I != N; ++I) {
+ if (I)
+ PathString += '.';
+ PathString += Path[I].first->getName();
+ }
+ int IncludeKind = 0;
+
+ switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
+ case tok::pp_include:
+ IncludeKind = 0;
+ break;
+
+ case tok::pp_import:
+ IncludeKind = 1;
+ break;
+
+ case tok::pp_include_next:
+ IncludeKind = 2;
+ break;
+
+ case tok::pp___include_macros:
+ IncludeKind = 3;
+ break;
+
+ default:
+ llvm_unreachable("unknown include directive kind");
+ }
+
+ CharSourceRange ReplaceRange(SourceRange(HashLoc, PathEnd),
+ /*IsTokenRange=*/false);
+ PP.Diag(HashLoc, diag::warn_auto_module_import)
+ << IncludeKind << PathString
+ << FixItHint::CreateReplacement(
+ ReplaceRange, (ImportKeyword + " " + PathString + ";").str());
+}
+
+// Given a vector of path components and a string containing the real
+// path to the file, build a properly-cased replacement in the vector,
+// and return true if the replacement should be suggested.
+static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
+ StringRef RealPathName) {
+ auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
+ auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
+ int Cnt = 0;
+ bool SuggestReplacement = false;
+ // Below is a best-effort to handle ".." in paths. It is admittedly
+ // not 100% correct in the presence of symlinks.
+ for (auto &Component : llvm::reverse(Components)) {
+ if ("." == Component) {
+ } else if (".." == Component) {
+ ++Cnt;
+ } else if (Cnt) {
+ --Cnt;
+ } else if (RealPathComponentIter != RealPathComponentEnd) {
+ if (Component != *RealPathComponentIter) {
+ // If these path components differ by more than just case, then we
+ // may be looking at symlinked paths. Bail on this diagnostic to avoid
+ // noisy false positives.
+ SuggestReplacement = RealPathComponentIter->equals_lower(Component);
+ if (!SuggestReplacement)
+ break;
+ Component = *RealPathComponentIter;
+ }
+ ++RealPathComponentIter;
+ }
+ }
+ return SuggestReplacement;
+}
+
+bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
+ const TargetInfo &TargetInfo,
+ DiagnosticsEngine &Diags, Module *M) {
+ Module::Requirement Requirement;
+ Module::UnresolvedHeaderDirective MissingHeader;
+ Module *ShadowingModule = nullptr;
+ if (M->isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
+ ShadowingModule))
+ return false;
+
+ if (MissingHeader.FileNameLoc.isValid()) {
+ Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
+ << MissingHeader.IsUmbrella << MissingHeader.FileName;
+ } else if (ShadowingModule) {
+ Diags.Report(M->DefinitionLoc, diag::err_module_shadowed) << M->Name;
+ Diags.Report(ShadowingModule->DefinitionLoc,
+ diag::note_previous_definition);
+ } else {
+ // FIXME: Track the location at which the requirement was specified, and
+ // use it here.
+ Diags.Report(M->DefinitionLoc, diag::err_module_unavailable)
+ << M->getFullModuleName() << Requirement.second << Requirement.first;
+ }
+ return true;
+}
+
+/// HandleIncludeDirective - The "\#include" tokens have just been read, read
+/// the file to be included from the lexer, then include it! This is a common
+/// routine with functionality shared between \#include, \#include_next and
+/// \#import. LookupFrom is set when this is a \#include_next directive, it
+/// specifies the file to start searching from.
+void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
+ Token &IncludeTok,
+ const DirectoryLookup *LookupFrom,
+ const FileEntry *LookupFromFile) {
+ Token FilenameTok;
+ if (LexHeaderName(FilenameTok))
+ return;
+
+ if (FilenameTok.isNot(tok::header_name)) {
+ Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+ if (FilenameTok.isNot(tok::eod))
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // Verify that there is nothing after the filename, other than EOD. Note
+ // that we allow macros that expand to nothing after the filename, because
+ // this falls into the category of "#include pp-tokens new-line" specified
+ // in C99 6.10.2p4.
+ SourceLocation EndLoc =
+ CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
+
+ auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
+ EndLoc, LookupFrom, LookupFromFile);
+ switch (Action.Kind) {
+ case ImportAction::None:
+ case ImportAction::SkippedModuleImport:
+ break;
+ case ImportAction::ModuleBegin:
+ EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
+ tok::annot_module_begin, Action.ModuleForHeader);
+ break;
+ case ImportAction::ModuleImport:
+ EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
+ tok::annot_module_include, Action.ModuleForHeader);
+ break;
+ }
+}
+
+Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport(
+ const DirectoryLookup *&CurDir, StringRef Filename,
+ SourceLocation FilenameLoc, CharSourceRange FilenameRange,
+ const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
+ bool &IsMapped, const DirectoryLookup *LookupFrom,
+ const FileEntry *LookupFromFile, StringRef LookupFilename,
+ SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
+ ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
+ Optional<FileEntryRef> File = LookupFile(
+ FilenameLoc, LookupFilename,
+ isAngled, LookupFrom, LookupFromFile, CurDir,
+ Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
+ &SuggestedModule, &IsMapped, &IsFrameworkFound);
+ if (File)
+ return File;
+
+ if (Callbacks) {
+ // Give the clients a chance to recover.
+ SmallString<128> RecoveryPath;
+ if (Callbacks->FileNotFound(Filename, RecoveryPath)) {
+ if (auto DE = FileMgr.getOptionalDirectoryRef(RecoveryPath)) {
+ // Add the recovery path to the list of search paths.
+ DirectoryLookup DL(*DE, SrcMgr::C_User, false);
+ HeaderInfo.AddSearchPath(DL, isAngled);
+
+ // Try the lookup again, skipping the cache.
+ Optional<FileEntryRef> File = LookupFile(
+ FilenameLoc,
+ LookupFilename, isAngled,
+ LookupFrom, LookupFromFile, CurDir, nullptr, nullptr,
+ &SuggestedModule, &IsMapped, /*IsFrameworkFound=*/nullptr,
+ /*SkipCache*/ true);
+ if (File)
+ return File;
+ }
+ }
+ }
+
+ if (SuppressIncludeNotFoundError)
+ return None;
+
+ // If the file could not be located and it was included via angle
+ // brackets, we can attempt a lookup as though it were a quoted path to
+ // provide the user with a possible fixit.
+ if (isAngled) {
+ Optional<FileEntryRef> File = LookupFile(
+ FilenameLoc, LookupFilename,
+ false, LookupFrom, LookupFromFile, CurDir,
+ Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
+ &SuggestedModule, &IsMapped,
+ /*IsFrameworkFound=*/nullptr);
+ if (File) {
+ Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
+ << Filename << IsImportDecl
+ << FixItHint::CreateReplacement(FilenameRange,
+ "\"" + Filename.str() + "\"");
+ return File;
+ }
+ }
+
+ // Check for likely typos due to leading or trailing non-isAlphanumeric
+ // characters
+ StringRef OriginalFilename = Filename;
+ if (LangOpts.SpellChecking) {
+ // A heuristic to correct a typo file name by removing leading and
+ // trailing non-isAlphanumeric characters.
+ auto CorrectTypoFilename = [](llvm::StringRef Filename) {
+ Filename = Filename.drop_until(isAlphanumeric);
+ while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
+ Filename = Filename.drop_back();
+ }
+ return Filename;
+ };
+ StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
+
+#ifndef _WIN32
+ // Normalize slashes when compiling with -fms-extensions on non-Windows.
+ // This is unnecessary on Windows since the filesystem there handles
+ // backslashes.
+ SmallString<128> NormalizedTypoCorrectionPath;
+ if (LangOpts.MicrosoftExt) {
+ NormalizedTypoCorrectionPath = TypoCorrectionName;
+ llvm::sys::path::native(NormalizedTypoCorrectionPath);
+ TypoCorrectionName = NormalizedTypoCorrectionPath;
+ }
+#endif
+
+ Optional<FileEntryRef> File = LookupFile(
+ FilenameLoc, TypoCorrectionName, isAngled, LookupFrom, LookupFromFile,
+ CurDir, Callbacks ? &SearchPath : nullptr,
+ Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
+ /*IsFrameworkFound=*/nullptr);
+ if (File) {
+ auto Hint =
+ isAngled ? FixItHint::CreateReplacement(
+ FilenameRange, "<" + TypoCorrectionName.str() + ">")
+ : FixItHint::CreateReplacement(
+ FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
+ Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
+ << OriginalFilename << TypoCorrectionName << Hint;
+ // We found the file, so set the Filename to the name after typo
+ // correction.
+ Filename = TypoCorrectionName;
+ return File;
+ }
+ }
+
+ // If the file is still not found, just go with the vanilla diagnostic
+ assert(!File.hasValue() && "expected missing file");
+ Diag(FilenameTok, diag::err_pp_file_not_found)
+ << OriginalFilename << FilenameRange;
+ if (IsFrameworkFound) {
+ size_t SlashPos = OriginalFilename.find('/');
+ assert(SlashPos != StringRef::npos &&
+ "Include with framework name should have '/' in the filename");
+ StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
+ FrameworkCacheEntry &CacheEntry =
+ HeaderInfo.LookupFrameworkCache(FrameworkName);
+ assert(CacheEntry.Directory && "Found framework should be in cache");
+ Diag(FilenameTok, diag::note_pp_framework_without_header)
+ << OriginalFilename.substr(SlashPos + 1) << FrameworkName
+ << CacheEntry.Directory->getName();
+ }
+
+ return None;
+}
+
+/// Handle either a #include-like directive or an import declaration that names
+/// a header file.
+///
+/// \param HashLoc The location of the '#' token for an include, or
+/// SourceLocation() for an import declaration.
+/// \param IncludeTok The include / include_next / import token.
+/// \param FilenameTok The header-name token.
+/// \param EndLoc The location at which any imported macros become visible.
+/// \param LookupFrom For #include_next, the starting directory for the
+/// directory lookup.
+/// \param LookupFromFile For #include_next, the starting file for the directory
+/// lookup.
+Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
+ SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
+ SourceLocation EndLoc, const DirectoryLookup *LookupFrom,
+ const FileEntry *LookupFromFile) {
+ SmallString<128> FilenameBuffer;
+ StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+ SourceLocation CharEnd = FilenameTok.getEndLoc();
+
+ CharSourceRange FilenameRange
+ = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
+ StringRef OriginalFilename = Filename;
+ bool isAngled =
+ GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ if (Filename.empty())
+ return {ImportAction::None};
+
+ bool IsImportDecl = HashLoc.isInvalid();
+ SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
+
+ // Complain about attempts to #include files in an audit pragma.
+ if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
+ Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
+ Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
+
+ // Immediately leave the pragma.
+ PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
+ }
+
+ // Complain about attempts to #include files in an assume-nonnull pragma.
+ if (PragmaAssumeNonNullLoc.isValid()) {
+ Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
+ Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
+
+ // Immediately leave the pragma.
+ PragmaAssumeNonNullLoc = SourceLocation();
+ }
+
+ if (HeaderInfo.HasIncludeAliasMap()) {
+ // Map the filename with the brackets still attached. If the name doesn't
+ // map to anything, fall back on the filename we've already gotten the
+ // spelling for.
+ StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
+ if (!NewName.empty())
+ Filename = NewName;
+ }
+
+ // Search include directories.
+ bool IsMapped = false;
+ bool IsFrameworkFound = false;
+ const DirectoryLookup *CurDir;
+ SmallString<1024> SearchPath;
+ SmallString<1024> RelativePath;
+ // We get the raw path only if we have 'Callbacks' to which we later pass
+ // the path.
+ ModuleMap::KnownHeader SuggestedModule;
+ SourceLocation FilenameLoc = FilenameTok.getLocation();
+ StringRef LookupFilename = Filename;
+
+#ifndef _WIN32
+ // Normalize slashes when compiling with -fms-extensions on non-Windows. This
+ // is unnecessary on Windows since the filesystem there handles backslashes.
+ SmallString<128> NormalizedPath;
+ if (LangOpts.MicrosoftExt) {
+ NormalizedPath = Filename.str();
+ llvm::sys::path::native(NormalizedPath);
+ LookupFilename = NormalizedPath;
+ }
+#endif
+
+ Optional<FileEntryRef> File = LookupHeaderIncludeOrImport(
+ CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
+ IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
+ LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
+
+ if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
+ if (File && isPCHThroughHeader(&File->getFileEntry()))
+ SkippingUntilPCHThroughHeader = false;
+ return {ImportAction::None};
+ }
+
+ // Check for circular inclusion of the main file.
+ // We can't generate a consistent preamble with regard to the conditional
+ // stack if the main file is included again as due to the preamble bounds
+ // some directives (e.g. #endif of a header guard) will never be seen.
+ // Since this will lead to confusing errors, avoid the inclusion.
+ if (File && PreambleConditionalStack.isRecording() &&
+ SourceMgr.translateFile(&File->getFileEntry()) ==
+ SourceMgr.getMainFileID()) {
+ Diag(FilenameTok.getLocation(),
+ diag::err_pp_including_mainfile_in_preamble);
+ return {ImportAction::None};
+ }
+
+ // Should we enter the source file? Set to Skip if either the source file is
+ // known to have no effect beyond its effect on module visibility -- that is,
+ // if it's got an include guard that is already defined, set to Import if it
+ // is a modular header we've already built and should import.
+ enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
+
+ if (PPOpts->SingleFileParseMode)
+ Action = IncludeLimitReached;
+
+ // If we've reached the max allowed include depth, it is usually due to an
+ // include cycle. Don't enter already processed files again as it can lead to
+ // reaching the max allowed include depth again.
+ if (Action == Enter && HasReachedMaxIncludeDepth && File &&
+ HeaderInfo.getFileInfo(&File->getFileEntry()).NumIncludes)
+ Action = IncludeLimitReached;
+
+ // Determine whether we should try to import the module for this #include, if
+ // there is one. Don't do so if precompiled module support is disabled or we
+ // are processing this module textually (because we're building the module).
+ if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
+ !isForModuleBuilding(SuggestedModule.getModule(),
+ getLangOpts().CurrentModule,
+ getLangOpts().ModuleName)) {
+ // If this include corresponds to a module but that module is
+ // unavailable, diagnose the situation and bail out.
+ // FIXME: Remove this; loadModule does the same check (but produces
+ // slightly worse diagnostics).
+ if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), getDiagnostics(),
+ SuggestedModule.getModule())) {
+ Diag(FilenameTok.getLocation(),
+ diag::note_implicit_top_level_module_import_here)
+ << SuggestedModule.getModule()->getTopLevelModuleName();
+ return {ImportAction::None};
+ }
+
+ // Compute the module access path corresponding to this module.
+ // FIXME: Should we have a second loadModule() overload to avoid this
+ // extra lookup step?
+ SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
+ for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent)
+ Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
+ FilenameTok.getLocation()));
+ std::reverse(Path.begin(), Path.end());
+
+ // Warn that we're replacing the include/import with a module import.
+ if (!IsImportDecl)
+ diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
+
+ // Load the module to import its macros. We'll make the declarations
+ // visible when the parser gets here.
+ // FIXME: Pass SuggestedModule in here rather than converting it to a path
+ // and making the module loader convert it back again.
+ ModuleLoadResult Imported = TheModuleLoader.loadModule(
+ IncludeTok.getLocation(), Path, Module::Hidden,
+ /*IsInclusionDirective=*/true);
+ assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
+ "the imported module is different than the suggested one");
+
+ if (Imported) {
+ Action = Import;
+ } else if (Imported.isMissingExpected()) {
+ // We failed to find a submodule that we assumed would exist (because it
+ // was in the directory of an umbrella header, for instance), but no
+ // actual module containing it exists (because the umbrella header is
+ // incomplete). Treat this as a textual inclusion.
+ SuggestedModule = ModuleMap::KnownHeader();
+ } else if (Imported.isConfigMismatch()) {
+ // On a configuration mismatch, enter the header textually. We still know
+ // that it's part of the corresponding module.
+ } else {
+ // We hit an error processing the import. Bail out.
+ if (hadModuleLoaderFatalFailure()) {
+ // With a fatal failure in the module loader, we abort parsing.
+ Token &Result = IncludeTok;
+ assert(CurLexer && "#include but no current lexer set!");
+ Result.startToken();
+ CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
+ CurLexer->cutOffLexing();
+ }
+ return {ImportAction::None};
+ }
+ }
+
+ // The #included file will be considered to be a system header if either it is
+ // in a system include directory, or if the #includer is a system include
+ // header.
+ SrcMgr::CharacteristicKind FileCharacter =
+ SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
+ if (File)
+ FileCharacter = std::max(HeaderInfo.getFileDirFlavor(&File->getFileEntry()),
+ FileCharacter);
+
+ // If this is a '#import' or an import-declaration, don't re-enter the file.
+ //
+ // FIXME: If we have a suggested module for a '#include', and we've already
+ // visited this file, don't bother entering it again. We know it has no
+ // further effect.
+ bool EnterOnce =
+ IsImportDecl ||
+ IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
+
+ // Ask HeaderInfo if we should enter this #include file. If not, #including
+ // this file will have no effect.
+ if (Action == Enter && File &&
+ !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
+ EnterOnce, getLangOpts().Modules,
+ SuggestedModule.getModule())) {
+ // Even if we've already preprocessed this header once and know that we
+ // don't need to see its contents again, we still need to import it if it's
+ // modular because we might not have imported it from this submodule before.
+ //
+ // FIXME: We don't do this when compiling a PCH because the AST
+ // serialization layer can't cope with it. This means we get local
+ // submodule visibility semantics wrong in that case.
+ Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
+ }
+
+ if (Callbacks && !IsImportDecl) {
+ // Notify the callback object that we've seen an inclusion directive.
+ // FIXME: Use a different callback for a pp-import?
+ Callbacks->InclusionDirective(
+ HashLoc, IncludeTok, LookupFilename, isAngled, FilenameRange,
+ File ? &File->getFileEntry() : nullptr, SearchPath, RelativePath,
+ Action == Import ? SuggestedModule.getModule() : nullptr,
+ FileCharacter);
+ if (Action == Skip && File)
+ Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
+ }
+
+ if (!File)
+ return {ImportAction::None};
+
+ // If this is a C++20 pp-import declaration, diagnose if we didn't find any
+ // module corresponding to the named header.
+ if (IsImportDecl && !SuggestedModule) {
+ Diag(FilenameTok, diag::err_header_import_not_header_unit)
+ << OriginalFilename << File->getName();
+ return {ImportAction::None};
+ }
+
+ // Issue a diagnostic if the name of the file on disk has a different case
+ // than the one we're about to open.
+ const bool CheckIncludePathPortability =
+ !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
+
+ if (CheckIncludePathPortability) {
+ StringRef Name = LookupFilename;
+ StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
+ SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
+ llvm::sys::path::end(Name));
+
+ if (trySimplifyPath(Components, RealPathName)) {
+ SmallString<128> Path;
+ Path.reserve(Name.size()+2);
+ Path.push_back(isAngled ? '<' : '"');
+ bool isLeadingSeparator = llvm::sys::path::is_absolute(Name);
+ for (auto Component : Components) {
+ if (isLeadingSeparator)
+ isLeadingSeparator = false;
+ else
+ Path.append(Component);
+ // Append the separator the user used, or the close quote
+ Path.push_back(
+ Path.size() <= Filename.size() ? Filename[Path.size()-1] :
+ (isAngled ? '>' : '"'));
+ }
+ // For user files and known standard headers, by default we issue a diagnostic.
+ // For other system headers, we don't. They can be controlled separately.
+ auto DiagId = (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) ?
+ diag::pp_nonportable_path : diag::pp_nonportable_system_path;
+ Diag(FilenameTok, DiagId) << Path <<
+ FixItHint::CreateReplacement(FilenameRange, Path);
+ }
+ }
+
+ switch (Action) {
+ case Skip:
+ // If we don't need to enter the file, stop now.
+ if (Module *M = SuggestedModule.getModule())
+ return {ImportAction::SkippedModuleImport, M};
+ return {ImportAction::None};
+
+ case IncludeLimitReached:
+ // If we reached our include limit and don't want to enter any more files,
+ // don't go any further.
+ return {ImportAction::None};
+
+ case Import: {
+ // If this is a module import, make it visible if needed.
+ Module *M = SuggestedModule.getModule();
+ assert(M && "no module to import");
+
+ makeModuleVisible(M, EndLoc);
+
+ if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
+ tok::pp___include_macros)
+ return {ImportAction::None};
+
+ return {ImportAction::ModuleImport, M};
+ }
+
+ case Enter:
+ break;
+ }
+
+ // Check that we don't have infinite #include recursion.
+ if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
+ Diag(FilenameTok, diag::err_pp_include_too_deep);
+ HasReachedMaxIncludeDepth = true;
+ return {ImportAction::None};
+ }
+
+ // Look up the file, create a File ID for it.
+ SourceLocation IncludePos = FilenameTok.getLocation();
+ // If the filename string was the result of macro expansions, set the include
+ // position on the file where it will be included and after the expansions.
+ if (IncludePos.isMacroID())
+ IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
+ FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
+ assert(FID.isValid() && "Expected valid file ID");
+
+ // If all is good, enter the new file!
+ if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation()))
+ return {ImportAction::None};
+
+ // Determine if we're switching to building a new submodule, and which one.
+ if (auto *M = SuggestedModule.getModule()) {
+ if (M->getTopLevelModule()->ShadowingModule) {
+ // We are building a submodule that belongs to a shadowed module. This
+ // means we find header files in the shadowed module.
+ Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
+ << M->getFullModuleName();
+ Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
+ diag::note_previous_definition);
+ return {ImportAction::None};
+ }
+ // When building a pch, -fmodule-name tells the compiler to textually
+ // include headers in the specified module. We are not building the
+ // specified module.
+ //
+ // FIXME: This is the wrong way to handle this. We should produce a PCH
+ // that behaves the same as the header would behave in a compilation using
+ // that PCH, which means we should enter the submodule. We need to teach
+ // the AST serialization layer to deal with the resulting AST.
+ if (getLangOpts().CompilingPCH &&
+ isForModuleBuilding(M, getLangOpts().CurrentModule,
+ getLangOpts().ModuleName))
+ return {ImportAction::None};
+
+ assert(!CurLexerSubmodule && "should not have marked this as a module yet");
+ CurLexerSubmodule = M;
+
+ // Let the macro handling code know that any future macros are within
+ // the new submodule.
+ EnterSubmodule(M, EndLoc, /*ForPragma*/false);
+
+ // Let the parser know that any future declarations are within the new
+ // submodule.
+ // FIXME: There's no point doing this if we're handling a #__include_macros
+ // directive.
+ return {ImportAction::ModuleBegin, M};
+ }
+
+ assert(!IsImportDecl && "failed to diagnose missing module for import decl");
+ return {ImportAction::None};
+}
+
+/// HandleIncludeNextDirective - Implements \#include_next.
+///
+void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
+ Token &IncludeNextTok) {
+ Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
+
+ // #include_next is like #include, except that we start searching after
+ // the current found directory. If we can't do this, issue a
+ // diagnostic.
+ const DirectoryLookup *Lookup = CurDirLookup;
+ const FileEntry *LookupFromFile = nullptr;
+ if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
+ // If the main file is a header, then it's either for PCH/AST generation,
+ // or libclang opened it. Either way, handle it as a normal include below
+ // and do not complain about include_next.
+ } else if (isInPrimaryFile()) {
+ Lookup = nullptr;
+ Diag(IncludeNextTok, diag::pp_include_next_in_primary);
+ } else if (CurLexerSubmodule) {
+ // Start looking up in the directory *after* the one in which the current
+ // file would be found, if any.
+ assert(CurPPLexer && "#include_next directive in macro?");
+ LookupFromFile = CurPPLexer->getFileEntry();
+ Lookup = nullptr;
+ } else if (!Lookup) {
+ // The current file was not found by walking the include path. Either it
+ // is the primary file (handled above), or it was found by absolute path,
+ // or it was found relative to such a file.
+ // FIXME: Track enough information so we know which case we're in.
+ Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
+ } else {
+ // Start looking up in the next directory.
+ ++Lookup;
+ }
+
+ return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
+ LookupFromFile);
+}
+
+/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
+void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
+ // The Microsoft #import directive takes a type library and generates header
+ // files from it, and includes those. This is beyond the scope of what clang
+ // does, so we ignore it and error out. However, #import can optionally have
+ // trailing attributes that span multiple lines. We're going to eat those
+ // so we can continue processing from there.
+ Diag(Tok, diag::err_pp_import_directive_ms );
+
+ // Read tokens until we get to the end of the directive. Note that the
+ // directive can be split over multiple lines using the backslash character.
+ DiscardUntilEndOfDirective();
+}
+
+/// HandleImportDirective - Implements \#import.
+///
+void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
+ Token &ImportTok) {
+ if (!LangOpts.ObjC) { // #import is standard for ObjC.
+ if (LangOpts.MSVCCompat)
+ return HandleMicrosoftImportDirective(ImportTok);
+ Diag(ImportTok, diag::ext_pp_import_directive);
+ }
+ return HandleIncludeDirective(HashLoc, ImportTok);
+}
+
+/// HandleIncludeMacrosDirective - The -imacros command line option turns into a
+/// pseudo directive in the predefines buffer. This handles it by sucking all
+/// tokens through the preprocessor and discarding them (only keeping the side
+/// effects on the preprocessor).
+void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
+ Token &IncludeMacrosTok) {
+ // This directive should only occur in the predefines buffer. If not, emit an
+ // error and reject it.
+ SourceLocation Loc = IncludeMacrosTok.getLocation();
+ if (SourceMgr.getBufferName(Loc) != "<built-in>") {
+ Diag(IncludeMacrosTok.getLocation(),
+ diag::pp_include_macros_out_of_predefines);
+ DiscardUntilEndOfDirective();
+ return;
+ }
+
+ // Treat this as a normal #include for checking purposes. If this is
+ // successful, it will push a new lexer onto the include stack.
+ HandleIncludeDirective(HashLoc, IncludeMacrosTok);
+
+ Token TmpTok;
+ do {
+ Lex(TmpTok);
+ assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
+ } while (TmpTok.isNot(tok::hashhash));
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Macro Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// ReadMacroParameterList - The ( starting a parameter list of a macro
+/// definition has just been read. Lex the rest of the parameters and the
+/// closing ), updating MI with what we learn. Return true if an error occurs
+/// parsing the param list.
+bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
+ SmallVector<IdentifierInfo*, 32> Parameters;
+
+ while (true) {
+ LexUnexpandedToken(Tok);
+ switch (Tok.getKind()) {
+ case tok::r_paren:
+ // Found the end of the parameter list.
+ if (Parameters.empty()) // #define FOO()
+ return false;
+ // Otherwise we have #define FOO(A,)
+ Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
+ return true;
+ case tok::ellipsis: // #define X(... -> C99 varargs
+ if (!LangOpts.C99)
+ Diag(Tok, LangOpts.CPlusPlus11 ?
+ diag::warn_cxx98_compat_variadic_macro :
+ diag::ext_variadic_macro);
+
+ // OpenCL v1.2 s6.9.e: variadic macros are not supported.
+ if (LangOpts.OpenCL) {
+ Diag(Tok, diag::ext_pp_opencl_variadic_macros);
+ }
+
+ // Lex the token after the identifier.
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+ return true;
+ }
+ // Add the __VA_ARGS__ identifier as a parameter.
+ Parameters.push_back(Ident__VA_ARGS__);
+ MI->setIsC99Varargs();
+ MI->setParameterList(Parameters, BP);
+ return false;
+ case tok::eod: // #define X(
+ Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+ return true;
+ default:
+ // Handle keywords and identifiers here to accept things like
+ // #define Foo(for) for.
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ if (!II) {
+ // #define X(1
+ Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
+ return true;
+ }
+
+ // If this is already used as a parameter, it is used multiple times (e.g.
+ // #define X(A,A.
+ if (llvm::find(Parameters, II) != Parameters.end()) { // C99 6.10.3p6
+ Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
+ return true;
+ }
+
+ // Add the parameter to the macro info.
+ Parameters.push_back(II);
+
+ // Lex the token after the identifier.
+ LexUnexpandedToken(Tok);
+
+ switch (Tok.getKind()) {
+ default: // #define X(A B
+ Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
+ return true;
+ case tok::r_paren: // #define X(A)
+ MI->setParameterList(Parameters, BP);
+ return false;
+ case tok::comma: // #define X(A,
+ break;
+ case tok::ellipsis: // #define X(A... -> GCC extension
+ // Diagnose extension.
+ Diag(Tok, diag::ext_named_variadic_macro);
+
+ // Lex the token after the identifier.
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+ return true;
+ }
+
+ MI->setIsGNUVarargs();
+ MI->setParameterList(Parameters, BP);
+ return false;
+ }
+ }
+ }
+}
+
+static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
+ const LangOptions &LOptions) {
+ if (MI->getNumTokens() == 1) {
+ const Token &Value = MI->getReplacementToken(0);
+
+ // Macro that is identity, like '#define inline inline' is a valid pattern.
+ if (MacroName.getKind() == Value.getKind())
+ return true;
+
+ // Macro that maps a keyword to the same keyword decorated with leading/
+ // trailing underscores is a valid pattern:
+ // #define inline __inline
+ // #define inline __inline__
+ // #define inline _inline (in MS compatibility mode)
+ StringRef MacroText = MacroName.getIdentifierInfo()->getName();
+ if (IdentifierInfo *II = Value.getIdentifierInfo()) {
+ if (!II->isKeyword(LOptions))
+ return false;
+ StringRef ValueText = II->getName();
+ StringRef TrimmedValue = ValueText;
+ if (!ValueText.startswith("__")) {
+ if (ValueText.startswith("_"))
+ TrimmedValue = TrimmedValue.drop_front(1);
+ else
+ return false;
+ } else {
+ TrimmedValue = TrimmedValue.drop_front(2);
+ if (TrimmedValue.endswith("__"))
+ TrimmedValue = TrimmedValue.drop_back(2);
+ }
+ return TrimmedValue.equals(MacroText);
+ } else {
+ return false;
+ }
+ }
+
+ // #define inline
+ return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
+ tok::kw_const) &&
+ MI->getNumTokens() == 0;
+}
+
+// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
+// entire line) of the macro's tokens and adds them to MacroInfo, and while
+// doing so performs certain validity checks including (but not limited to):
+// - # (stringization) is followed by a macro parameter
+//
+// Returns a nullptr if an invalid sequence of tokens is encountered or returns
+// a pointer to a MacroInfo object.
+
+MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
+ const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
+
+ Token LastTok = MacroNameTok;
+ // Create the new macro.
+ MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
+
+ Token Tok;
+ LexUnexpandedToken(Tok);
+
+ // Ensure we consume the rest of the macro body if errors occur.
+ auto _ = llvm::make_scope_exit([&]() {
+ // The flag indicates if we are still waiting for 'eod'.
+ if (CurLexer->ParsingPreprocessorDirective)
+ DiscardUntilEndOfDirective();
+ });
+
+ // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
+ // within their appropriate context.
+ VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
+
+ // If this is a function-like macro definition, parse the argument list,
+ // marking each of the identifiers as being used as macro arguments. Also,
+ // check other constraints on the first token of the macro body.
+ if (Tok.is(tok::eod)) {
+ if (ImmediatelyAfterHeaderGuard) {
+ // Save this macro information since it may part of a header guard.
+ CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
+ MacroNameTok.getLocation());
+ }
+ // If there is no body to this macro, we have no special handling here.
+ } else if (Tok.hasLeadingSpace()) {
+ // This is a normal token with leading space. Clear the leading space
+ // marker on the first token to get proper expansion.
+ Tok.clearFlag(Token::LeadingSpace);
+ } else if (Tok.is(tok::l_paren)) {
+ // This is a function-like macro definition. Read the argument list.
+ MI->setIsFunctionLike();
+ if (ReadMacroParameterList(MI, LastTok))
+ return nullptr;
+
+ // If this is a definition of an ISO C/C++ variadic function-like macro (not
+ // using the GNU named varargs extension) inform our variadic scope guard
+ // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
+ // allowed only within the definition of a variadic macro.
+
+ if (MI->isC99Varargs()) {
+ VariadicMacroScopeGuard.enterScope();
+ }
+
+ // Read the first token after the arg list for down below.
+ LexUnexpandedToken(Tok);
+ } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
+ // C99 requires whitespace between the macro definition and the body. Emit
+ // a diagnostic for something like "#define X+".
+ Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
+ } else {
+ // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
+ // first character of a replacement list is not a character required by
+ // subclause 5.2.1, then there shall be white-space separation between the
+ // identifier and the replacement list.". 5.2.1 lists this set:
+ // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
+ // is irrelevant here.
+ bool isInvalid = false;
+ if (Tok.is(tok::at)) // @ is not in the list above.
+ isInvalid = true;
+ else if (Tok.is(tok::unknown)) {
+ // If we have an unknown token, it is something strange like "`". Since
+ // all of valid characters would have lexed into a single character
+ // token of some sort, we know this is not a valid case.
+ isInvalid = true;
+ }
+ if (isInvalid)
+ Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
+ else
+ Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
+ }
+
+ if (!Tok.is(tok::eod))
+ LastTok = Tok;
+
+ // Read the rest of the macro body.
+ if (MI->isObjectLike()) {
+ // Object-like macros are very simple, just read their body.
+ while (Tok.isNot(tok::eod)) {
+ LastTok = Tok;
+ MI->AddTokenToBody(Tok);
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ }
+ } else {
+ // Otherwise, read the body of a function-like macro. While we are at it,
+ // check C99 6.10.3.2p1: ensure that # operators are followed by macro
+ // parameters in function-like macro expansions.
+
+ VAOptDefinitionContext VAOCtx(*this);
+
+ while (Tok.isNot(tok::eod)) {
+ LastTok = Tok;
+
+ if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
+ MI->AddTokenToBody(Tok);
+
+ if (VAOCtx.isVAOptToken(Tok)) {
+ // If we're already within a VAOPT, emit an error.
+ if (VAOCtx.isInVAOpt()) {
+ Diag(Tok, diag::err_pp_vaopt_nested_use);
+ return nullptr;
+ }
+ // Ensure VAOPT is followed by a '(' .
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
+ return nullptr;
+ }
+ MI->AddTokenToBody(Tok);
+ VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
+ LexUnexpandedToken(Tok);
+ if (Tok.is(tok::hashhash)) {
+ Diag(Tok, diag::err_vaopt_paste_at_start);
+ return nullptr;
+ }
+ continue;
+ } else if (VAOCtx.isInVAOpt()) {
+ if (Tok.is(tok::r_paren)) {
+ if (VAOCtx.sawClosingParen()) {
+ const unsigned NumTokens = MI->getNumTokens();
+ assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( "
+ "and a subsequent tok::r_paren");
+ if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) {
+ Diag(Tok, diag::err_vaopt_paste_at_end);
+ return nullptr;
+ }
+ }
+ } else if (Tok.is(tok::l_paren)) {
+ VAOCtx.sawOpeningParen(Tok.getLocation());
+ }
+ }
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ continue;
+ }
+
+ // If we're in -traditional mode, then we should ignore stringification
+ // and token pasting. Mark the tokens as unknown so as not to confuse
+ // things.
+ if (getLangOpts().TraditionalCPP) {
+ Tok.setKind(tok::unknown);
+ MI->AddTokenToBody(Tok);
+
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ continue;
+ }
+
+ if (Tok.is(tok::hashhash)) {
+ // If we see token pasting, check if it looks like the gcc comma
+ // pasting extension. We'll use this information to suppress
+ // diagnostics later on.
+
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+
+ if (Tok.is(tok::eod)) {
+ MI->AddTokenToBody(LastTok);
+ break;
+ }
+
+ unsigned NumTokens = MI->getNumTokens();
+ if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
+ MI->getReplacementToken(NumTokens-1).is(tok::comma))
+ MI->setHasCommaPasting();
+
+ // Things look ok, add the '##' token to the macro.
+ MI->AddTokenToBody(LastTok);
+ continue;
+ }
+
+ // Our Token is a stringization operator.
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+
+ // Check for a valid macro arg identifier or __VA_OPT__.
+ if (!VAOCtx.isVAOptToken(Tok) &&
+ (Tok.getIdentifierInfo() == nullptr ||
+ MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
+
+ // If this is assembler-with-cpp mode, we accept random gibberish after
+ // the '#' because '#' is often a comment character. However, change
+ // the kind of the token to tok::unknown so that the preprocessor isn't
+ // confused.
+ if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
+ LastTok.setKind(tok::unknown);
+ MI->AddTokenToBody(LastTok);
+ continue;
+ } else {
+ Diag(Tok, diag::err_pp_stringize_not_parameter)
+ << LastTok.is(tok::hashat);
+ return nullptr;
+ }
+ }
+
+ // Things look ok, add the '#' and param name tokens to the macro.
+ MI->AddTokenToBody(LastTok);
+
+ // If the token following '#' is VAOPT, let the next iteration handle it
+ // and check it for correctness, otherwise add the token and prime the
+ // loop with the next one.
+ if (!VAOCtx.isVAOptToken(Tok)) {
+ MI->AddTokenToBody(Tok);
+ LastTok = Tok;
+
+ // Get the next token of the macro.
+ LexUnexpandedToken(Tok);
+ }
+ }
+ if (VAOCtx.isInVAOpt()) {
+ assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
+ Diag(Tok, diag::err_pp_expected_after)
+ << LastTok.getKind() << tok::r_paren;
+ Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
+ return nullptr;
+ }
+ }
+ MI->setDefinitionEndLoc(LastTok.getLocation());
+ return MI;
+}
+/// HandleDefineDirective - Implements \#define. This consumes the entire macro
+/// line then lets the caller lex the next real token.
+void Preprocessor::HandleDefineDirective(
+ Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
+ ++NumDefined;
+
+ Token MacroNameTok;
+ bool MacroShadowsKeyword;
+ ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eod))
+ return;
+
+ // If we are supposed to keep comments in #defines, reenable comment saving
+ // mode.
+ if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
+
+ MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
+ MacroNameTok, ImmediatelyAfterHeaderGuard);
+
+ if (!MI) return;
+
+ if (MacroShadowsKeyword &&
+ !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
+ Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
+ }
+ // Check that there is no paste (##) operator at the beginning or end of the
+ // replacement list.
+ unsigned NumTokens = MI->getNumTokens();
+ if (NumTokens != 0) {
+ if (MI->getReplacementToken(0).is(tok::hashhash)) {
+ Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
+ return;
+ }
+ if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
+ Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
+ return;
+ }
+ }
+
+ // When skipping just warn about macros that do not match.
+ if (SkippingUntilPCHThroughHeader) {
+ const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+ if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
+ /*Syntactic=*/LangOpts.MicrosoftExt))
+ Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
+ << MacroNameTok.getIdentifierInfo();
+ return;
+ }
+
+ // Finally, if this identifier already had a macro defined for it, verify that
+ // the macro bodies are identical, and issue diagnostics if they are not.
+ if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+ // In Objective-C, ignore attempts to directly redefine the builtin
+ // definitions of the ownership qualifiers. It's still possible to
+ // #undef them.
+ auto isObjCProtectedMacro = [](const IdentifierInfo *II) -> bool {
+ return II->isStr("__strong") ||
+ II->isStr("__weak") ||
+ II->isStr("__unsafe_unretained") ||
+ II->isStr("__autoreleasing");
+ };
+ if (getLangOpts().ObjC &&
+ SourceMgr.getFileID(OtherMI->getDefinitionLoc())
+ == getPredefinesFileID() &&
+ isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
+ // Warn if it changes the tokens.
+ if ((!getDiagnostics().getSuppressSystemWarnings() ||
+ !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
+ !MI->isIdenticalTo(*OtherMI, *this,
+ /*Syntactic=*/LangOpts.MicrosoftExt)) {
+ Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
+ }
+ assert(!OtherMI->isWarnIfUnused());
+ return;
+ }
+
+ // It is very common for system headers to have tons of macro redefinitions
+ // and for warnings to be disabled in system headers. If this is the case,
+ // then don't bother calling MacroInfo::isIdenticalTo.
+ if (!getDiagnostics().getSuppressSystemWarnings() ||
+ !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
+ if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
+ Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
+
+ // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
+ // C++ [cpp.predefined]p4, but allow it as an extension.
+ if (OtherMI->isBuiltinMacro())
+ Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
+ // Macros must be identical. This means all tokens and whitespace
+ // separation must be the same. C99 6.10.3p2.
+ else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
+ !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
+ Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
+ << MacroNameTok.getIdentifierInfo();
+ Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
+ }
+ }
+ if (OtherMI->isWarnIfUnused())
+ WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
+ }
+
+ DefMacroDirective *MD =
+ appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
+
+ assert(!MI->isUsed());
+ // If we need warning for not using the macro, add its location in the
+ // warn-because-unused-macro set. If it gets used it will be removed from set.
+ if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
+ !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
+ !MacroExpansionInDirectivesOverride) {
+ MI->setIsWarnIfUnused(true);
+ WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
+ }
+
+ // If the callbacks want to know, tell them about the macro definition.
+ if (Callbacks)
+ Callbacks->MacroDefined(MacroNameTok, MD);
+}
+
+/// HandleUndefDirective - Implements \#undef.
+///
+void Preprocessor::HandleUndefDirective() {
+ ++NumUndefined;
+
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok, MU_Undef);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eod))
+ return;
+
+ // Check to see if this is the last token on the #undef line.
+ CheckEndOfDirective("undef");
+
+ // Okay, we have a valid identifier to undef.
+ auto *II = MacroNameTok.getIdentifierInfo();
+ auto MD = getMacroDefinition(II);
+ UndefMacroDirective *Undef = nullptr;
+
+ // If the macro is not defined, this is a noop undef.
+ if (const MacroInfo *MI = MD.getMacroInfo()) {
+ if (!MI->isUsed() && MI->isWarnIfUnused())
+ Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
+
+ if (MI->isWarnIfUnused())
+ WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
+
+ Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
+ }
+
+ // If the callbacks want to know, tell them about the macro #undef.
+ // Note: no matter if the macro was defined or not.
+ if (Callbacks)
+ Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
+
+ if (Undef)
+ appendMacroDirective(II, Undef);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Conditional Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef
+/// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is
+/// true if any tokens have been returned or pp-directives activated before this
+/// \#ifndef has been lexed.
+///
+void Preprocessor::HandleIfdefDirective(Token &Result,
+ const Token &HashToken,
+ bool isIfndef,
+ bool ReadAnyTokensBeforeDirective) {
+ ++NumIf;
+ Token DirectiveTok = Result;
+
+ Token MacroNameTok;
+ ReadMacroName(MacroNameTok);
+
+ // Error reading macro name? If so, diagnostic already issued.
+ if (MacroNameTok.is(tok::eod)) {
+ // Skip code until we get to #endif. This helps with recovery by not
+ // emitting an error when the #endif is reached.
+ SkipExcludedConditionalBlock(HashToken.getLocation(),
+ DirectiveTok.getLocation(),
+ /*Foundnonskip*/ false, /*FoundElse*/ false);
+ return;
+ }
+
+ // Check to see if this is the last token on the #if[n]def line.
+ CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
+
+ IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
+ auto MD = getMacroDefinition(MII);
+ MacroInfo *MI = MD.getMacroInfo();
+
+ if (CurPPLexer->getConditionalStackDepth() == 0) {
+ // If the start of a top-level #ifdef and if the macro is not defined,
+ // inform MIOpt that this might be the start of a proper include guard.
+ // Otherwise it is some other form of unknown conditional which we can't
+ // handle.
+ if (!ReadAnyTokensBeforeDirective && !MI) {
+ assert(isIfndef && "#ifdef shouldn't reach here");
+ CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
+ } else
+ CurPPLexer->MIOpt.EnterTopLevelConditional();
+ }
+
+ // If there is a macro, process it.
+ if (MI) // Mark it used.
+ markMacroAsUsed(MI);
+
+ if (Callbacks) {
+ if (isIfndef)
+ Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
+ else
+ Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
+ }
+
+ bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
+ getSourceManager().isInMainFile(DirectiveTok.getLocation());
+
+ // Should we include the stuff contained by this directive?
+ if (PPOpts->SingleFileParseMode && !MI) {
+ // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+ // the directive blocks.
+ CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
+ /*wasskip*/false, /*foundnonskip*/false,
+ /*foundelse*/false);
+ } else if (!MI == isIfndef || RetainExcludedCB) {
+ // Yes, remember that we are inside a conditional, then lex the next token.
+ CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
+ /*wasskip*/false, /*foundnonskip*/true,
+ /*foundelse*/false);
+ } else {
+ // No, skip the contents of this block.
+ SkipExcludedConditionalBlock(HashToken.getLocation(),
+ DirectiveTok.getLocation(),
+ /*Foundnonskip*/ false,
+ /*FoundElse*/ false);
+ }
+}
+
+/// HandleIfDirective - Implements the \#if directive.
+///
+void Preprocessor::HandleIfDirective(Token &IfToken,
+ const Token &HashToken,
+ bool ReadAnyTokensBeforeDirective) {
+ ++NumIf;
+
+ // Parse and evaluate the conditional expression.
+ IdentifierInfo *IfNDefMacro = nullptr;
+ const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
+ const bool ConditionalTrue = DER.Conditional;
+
+ // If this condition is equivalent to #ifndef X, and if this is the first
+ // directive seen, handle it for the multiple-include optimization.
+ if (CurPPLexer->getConditionalStackDepth() == 0) {
+ if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
+ // FIXME: Pass in the location of the macro name, not the 'if' token.
+ CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
+ else
+ CurPPLexer->MIOpt.EnterTopLevelConditional();
+ }
+
+ if (Callbacks)
+ Callbacks->If(
+ IfToken.getLocation(), DER.ExprRange,
+ (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
+
+ bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
+ getSourceManager().isInMainFile(IfToken.getLocation());
+
+ // Should we include the stuff contained by this directive?
+ if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
+ // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+ // the directive blocks.
+ CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
+ /*foundnonskip*/false, /*foundelse*/false);
+ } else if (ConditionalTrue || RetainExcludedCB) {
+ // Yes, remember that we are inside a conditional, then lex the next token.
+ CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
+ /*foundnonskip*/true, /*foundelse*/false);
+ } else {
+ // No, skip the contents of this block.
+ SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
+ /*Foundnonskip*/ false,
+ /*FoundElse*/ false);
+ }
+}
+
+/// HandleEndifDirective - Implements the \#endif directive.
+///
+void Preprocessor::HandleEndifDirective(Token &EndifToken) {
+ ++NumEndif;
+
+ // Check that this is the whole directive.
+ CheckEndOfDirective("endif");
+
+ PPConditionalInfo CondInfo;
+ if (CurPPLexer->popConditionalLevel(CondInfo)) {
+ // No conditionals on the stack: this is an #endif without an #if.
+ Diag(EndifToken, diag::err_pp_endif_without_if);
+ return;
+ }
+
+ // If this the end of a top-level #endif, inform MIOpt.
+ if (CurPPLexer->getConditionalStackDepth() == 0)
+ CurPPLexer->MIOpt.ExitTopLevelConditional();
+
+ assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
+ "This code should only be reachable in the non-skipping case!");
+
+ if (Callbacks)
+ Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
+}
+
+/// HandleElseDirective - Implements the \#else directive.
+///
+void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
+ ++NumElse;
+
+ // #else directive in a non-skipping conditional... start skipping.
+ CheckEndOfDirective("else");
+
+ PPConditionalInfo CI;
+ if (CurPPLexer->popConditionalLevel(CI)) {
+ Diag(Result, diag::pp_err_else_without_if);
+ return;
+ }
+
+ // If this is a top-level #else, inform the MIOpt.
+ if (CurPPLexer->getConditionalStackDepth() == 0)
+ CurPPLexer->MIOpt.EnterTopLevelConditional();
+
+ // If this is a #else with a #else before it, report the error.
+ if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
+
+ if (Callbacks)
+ Callbacks->Else(Result.getLocation(), CI.IfLoc);
+
+ bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
+ getSourceManager().isInMainFile(Result.getLocation());
+
+ if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
+ // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+ // the directive blocks.
+ CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
+ /*foundnonskip*/false, /*foundelse*/true);
+ return;
+ }
+
+ // Finally, skip the rest of the contents of this block.
+ SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
+ /*Foundnonskip*/ true,
+ /*FoundElse*/ true, Result.getLocation());
+}
+
+/// HandleElifDirective - Implements the \#elif directive.
+///
+void Preprocessor::HandleElifDirective(Token &ElifToken,
+ const Token &HashToken) {
+ ++NumElse;
+
+ // #elif directive in a non-skipping conditional... start skipping.
+ // We don't care what the condition is, because we will always skip it (since
+ // the block immediately before it was included).
+ SourceRange ConditionRange = DiscardUntilEndOfDirective();
+
+ PPConditionalInfo CI;
+ if (CurPPLexer->popConditionalLevel(CI)) {
+ Diag(ElifToken, diag::pp_err_elif_without_if);
+ return;
+ }
+
+ // If this is a top-level #elif, inform the MIOpt.
+ if (CurPPLexer->getConditionalStackDepth() == 0)
+ CurPPLexer->MIOpt.EnterTopLevelConditional();
+
+ // If this is a #elif with a #else before it, report the error.
+ if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
+
+ if (Callbacks)
+ Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
+ PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
+
+ bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
+ getSourceManager().isInMainFile(ElifToken.getLocation());
+
+ if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
+ // In 'single-file-parse mode' undefined identifiers trigger parsing of all
+ // the directive blocks.
+ CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
+ /*foundnonskip*/false, /*foundelse*/false);
+ return;
+ }
+
+ // Finally, skip the rest of the contents of this block.
+ SkipExcludedConditionalBlock(
+ HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
+ /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
+}
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
new file mode 100644
index 000000000000..e5ec2b99f507
--- /dev/null
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -0,0 +1,899 @@
+//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Preprocessor::EvaluateDirectiveExpression method,
+// which parses and evaluates integer constant expressions for #if directives.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: implement testing for #assert's.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include <cassert>
+
+using namespace clang;
+
+namespace {
+
+/// PPValue - Represents the value of a subexpression of a preprocessor
+/// conditional and the source range covered by it.
+class PPValue {
+ SourceRange Range;
+ IdentifierInfo *II;
+
+public:
+ llvm::APSInt Val;
+
+ // Default ctor - Construct an 'invalid' PPValue.
+ PPValue(unsigned BitWidth) : Val(BitWidth) {}
+
+ // If this value was produced by directly evaluating an identifier, produce
+ // that identifier.
+ IdentifierInfo *getIdentifier() const { return II; }
+ void setIdentifier(IdentifierInfo *II) { this->II = II; }
+
+ unsigned getBitWidth() const { return Val.getBitWidth(); }
+ bool isUnsigned() const { return Val.isUnsigned(); }
+
+ SourceRange getRange() const { return Range; }
+
+ void setRange(SourceLocation L) { Range.setBegin(L); Range.setEnd(L); }
+ void setRange(SourceLocation B, SourceLocation E) {
+ Range.setBegin(B); Range.setEnd(E);
+ }
+ void setBegin(SourceLocation L) { Range.setBegin(L); }
+ void setEnd(SourceLocation L) { Range.setEnd(L); }
+};
+
+} // end anonymous namespace
+
+static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
+ Token &PeekTok, bool ValueLive,
+ bool &IncludedUndefinedIds,
+ Preprocessor &PP);
+
+/// DefinedTracker - This struct is used while parsing expressions to keep track
+/// of whether !defined(X) has been seen.
+///
+/// With this simple scheme, we handle the basic forms:
+/// !defined(X) and !defined X
+/// but we also trivially handle (silly) stuff like:
+/// !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)).
+struct DefinedTracker {
+ /// Each time a Value is evaluated, it returns information about whether the
+ /// parsed value is of the form defined(X), !defined(X) or is something else.
+ enum TrackerState {
+ DefinedMacro, // defined(X)
+ NotDefinedMacro, // !defined(X)
+ Unknown // Something else.
+ } State;
+ /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this
+ /// indicates the macro that was checked.
+ IdentifierInfo *TheMacro;
+ bool IncludedUndefinedIds = false;
+};
+
+/// EvaluateDefined - Process a 'defined(sym)' expression.
+static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
+ bool ValueLive, Preprocessor &PP) {
+ SourceLocation beginLoc(PeekTok.getLocation());
+ Result.setBegin(beginLoc);
+
+ // Get the next token, don't expand it.
+ PP.LexUnexpandedNonComment(PeekTok);
+
+ // Two options, it can either be a pp-identifier or a (.
+ SourceLocation LParenLoc;
+ if (PeekTok.is(tok::l_paren)) {
+ // Found a paren, remember we saw it and skip it.
+ LParenLoc = PeekTok.getLocation();
+ PP.LexUnexpandedNonComment(PeekTok);
+ }
+
+ if (PeekTok.is(tok::code_completion)) {
+ if (PP.getCodeCompletionHandler())
+ PP.getCodeCompletionHandler()->CodeCompleteMacroName(false);
+ PP.setCodeCompletionReached();
+ PP.LexUnexpandedNonComment(PeekTok);
+ }
+
+ // If we don't have a pp-identifier now, this is an error.
+ if (PP.CheckMacroName(PeekTok, MU_Other))
+ return true;
+
+ // Otherwise, we got an identifier, is it defined to something?
+ IdentifierInfo *II = PeekTok.getIdentifierInfo();
+ MacroDefinition Macro = PP.getMacroDefinition(II);
+ Result.Val = !!Macro;
+ Result.Val.setIsUnsigned(false); // Result is signed intmax_t.
+ DT.IncludedUndefinedIds = !Macro;
+
+ // If there is a macro, mark it used.
+ if (Result.Val != 0 && ValueLive)
+ PP.markMacroAsUsed(Macro.getMacroInfo());
+
+ // Save macro token for callback.
+ Token macroToken(PeekTok);
+
+ // If we are in parens, ensure we have a trailing ).
+ if (LParenLoc.isValid()) {
+ // Consume identifier.
+ Result.setEnd(PeekTok.getLocation());
+ PP.LexUnexpandedNonComment(PeekTok);
+
+ if (PeekTok.isNot(tok::r_paren)) {
+ PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_after)
+ << "'defined'" << tok::r_paren;
+ PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+ return true;
+ }
+ // Consume the ).
+ PP.LexNonComment(PeekTok);
+ Result.setEnd(PeekTok.getLocation());
+ } else {
+ // Consume identifier.
+ Result.setEnd(PeekTok.getLocation());
+ PP.LexNonComment(PeekTok);
+ }
+
+ // [cpp.cond]p4:
+ // Prior to evaluation, macro invocations in the list of preprocessing
+ // tokens that will become the controlling constant expression are replaced
+ // (except for those macro names modified by the 'defined' unary operator),
+ // just as in normal text. If the token 'defined' is generated as a result
+ // of this replacement process or use of the 'defined' unary operator does
+ // not match one of the two specified forms prior to macro replacement, the
+ // behavior is undefined.
+ // This isn't an idle threat, consider this program:
+ // #define FOO
+ // #define BAR defined(FOO)
+ // #if BAR
+ // ...
+ // #else
+ // ...
+ // #endif
+ // clang and gcc will pick the #if branch while Visual Studio will take the
+ // #else branch. Emit a warning about this undefined behavior.
+ if (beginLoc.isMacroID()) {
+ bool IsFunctionTypeMacro =
+ PP.getSourceManager()
+ .getSLocEntry(PP.getSourceManager().getFileID(beginLoc))
+ .getExpansion()
+ .isFunctionMacroExpansion();
+ // For object-type macros, it's easy to replace
+ // #define FOO defined(BAR)
+ // with
+ // #if defined(BAR)
+ // #define FOO 1
+ // #else
+ // #define FOO 0
+ // #endif
+ // and doing so makes sense since compilers handle this differently in
+ // practice (see example further up). But for function-type macros,
+ // there is no good way to write
+ // # define FOO(x) (defined(M_ ## x) && M_ ## x)
+ // in a different way, and compilers seem to agree on how to behave here.
+ // So warn by default on object-type macros, but only warn in -pedantic
+ // mode on function-type macros.
+ if (IsFunctionTypeMacro)
+ PP.Diag(beginLoc, diag::warn_defined_in_function_type_macro);
+ else
+ PP.Diag(beginLoc, diag::warn_defined_in_object_type_macro);
+ }
+
+ // Invoke the 'defined' callback.
+ if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
+ Callbacks->Defined(macroToken, Macro,
+ SourceRange(beginLoc, PeekTok.getLocation()));
+ }
+
+ // Success, remember that we saw defined(X).
+ DT.State = DefinedTracker::DefinedMacro;
+ DT.TheMacro = II;
+ return false;
+}
+
+/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and
+/// return the computed value in Result. Return true if there was an error
+/// parsing. This function also returns information about the form of the
+/// expression in DT. See above for information on what DT means.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used. As such, avoid diagnostics that relate to
+/// evaluation.
+static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
+ bool ValueLive, Preprocessor &PP) {
+ DT.State = DefinedTracker::Unknown;
+
+ Result.setIdentifier(nullptr);
+
+ if (PeekTok.is(tok::code_completion)) {
+ if (PP.getCodeCompletionHandler())
+ PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression();
+ PP.setCodeCompletionReached();
+ PP.LexNonComment(PeekTok);
+ }
+
+ switch (PeekTok.getKind()) {
+ default:
+ // If this token's spelling is a pp-identifier, check to see if it is
+ // 'defined' or if it is a macro. Note that we check here because many
+ // keywords are pp-identifiers, so we can't check the kind.
+ if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
+ // Handle "defined X" and "defined(X)".
+ if (II->isStr("defined"))
+ return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP);
+
+ if (!II->isCPlusPlusOperatorKeyword()) {
+ // If this identifier isn't 'defined' or one of the special
+ // preprocessor keywords and it wasn't macro expanded, it turns
+ // into a simple 0
+ if (ValueLive)
+ PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II;
+ Result.Val = 0;
+ Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0.
+ Result.setIdentifier(II);
+ Result.setRange(PeekTok.getLocation());
+ DT.IncludedUndefinedIds = true;
+ PP.LexNonComment(PeekTok);
+ return false;
+ }
+ }
+ PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr);
+ return true;
+ case tok::eod:
+ case tok::r_paren:
+ // If there is no expression, report and exit.
+ PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr);
+ return true;
+ case tok::numeric_constant: {
+ SmallString<64> IntegerBuffer;
+ bool NumberInvalid = false;
+ StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer,
+ &NumberInvalid);
+ if (NumberInvalid)
+ return true; // a diagnostic was already reported
+
+ NumericLiteralParser Literal(Spelling, PeekTok.getLocation(), PP);
+ if (Literal.hadError)
+ return true; // a diagnostic was already reported.
+
+ if (Literal.isFloatingLiteral() || Literal.isImaginary) {
+ PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal);
+ return true;
+ }
+ assert(Literal.isIntegerLiteral() && "Unknown ppnumber");
+
+ // Complain about, and drop, any ud-suffix.
+ if (Literal.hasUDSuffix())
+ PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*integer*/1;
+
+ // 'long long' is a C99 or C++11 feature.
+ if (!PP.getLangOpts().C99 && Literal.isLongLong) {
+ if (PP.getLangOpts().CPlusPlus)
+ PP.Diag(PeekTok,
+ PP.getLangOpts().CPlusPlus11 ?
+ diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
+ else
+ PP.Diag(PeekTok, diag::ext_c99_longlong);
+ }
+
+ // Parse the integer literal into Result.
+ if (Literal.GetIntegerValue(Result.Val)) {
+ // Overflow parsing integer literal.
+ if (ValueLive)
+ PP.Diag(PeekTok, diag::err_integer_literal_too_large)
+ << /* Unsigned */ 1;
+ Result.Val.setIsUnsigned(true);
+ } else {
+ // Set the signedness of the result to match whether there was a U suffix
+ // or not.
+ Result.Val.setIsUnsigned(Literal.isUnsigned);
+
+ // Detect overflow based on whether the value is signed. If signed
+ // and if the value is too large, emit a warning "integer constant is so
+ // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t
+ // is 64-bits.
+ if (!Literal.isUnsigned && Result.Val.isNegative()) {
+ // Octal, hexadecimal, and binary literals are implicitly unsigned if
+ // the value does not fit into a signed integer type.
+ if (ValueLive && Literal.getRadix() == 10)
+ PP.Diag(PeekTok, diag::ext_integer_literal_too_large_for_signed);
+ Result.Val.setIsUnsigned(true);
+ }
+ }
+
+ // Consume the token.
+ Result.setRange(PeekTok.getLocation());
+ PP.LexNonComment(PeekTok);
+ return false;
+ }
+ case tok::char_constant: // 'x'
+ case tok::wide_char_constant: // L'x'
+ case tok::utf8_char_constant: // u8'x'
+ case tok::utf16_char_constant: // u'x'
+ case tok::utf32_char_constant: { // U'x'
+ // Complain about, and drop, any ud-suffix.
+ if (PeekTok.hasUDSuffix())
+ PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0;
+
+ SmallString<32> CharBuffer;
+ bool CharInvalid = false;
+ StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
+ if (CharInvalid)
+ return true;
+
+ CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
+ PeekTok.getLocation(), PP, PeekTok.getKind());
+ if (Literal.hadError())
+ return true; // A diagnostic was already emitted.
+
+ // Character literals are always int or wchar_t, expand to intmax_t.
+ const TargetInfo &TI = PP.getTargetInfo();
+ unsigned NumBits;
+ if (Literal.isMultiChar())
+ NumBits = TI.getIntWidth();
+ else if (Literal.isWide())
+ NumBits = TI.getWCharWidth();
+ else if (Literal.isUTF16())
+ NumBits = TI.getChar16Width();
+ else if (Literal.isUTF32())
+ NumBits = TI.getChar32Width();
+ else // char or char8_t
+ NumBits = TI.getCharWidth();
+
+ // Set the width.
+ llvm::APSInt Val(NumBits);
+ // Set the value.
+ Val = Literal.getValue();
+ // Set the signedness. UTF-16 and UTF-32 are always unsigned
+ if (Literal.isWide())
+ Val.setIsUnsigned(!TargetInfo::isTypeSigned(TI.getWCharType()));
+ else if (!Literal.isUTF16() && !Literal.isUTF32())
+ Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned);
+
+ if (Result.Val.getBitWidth() > Val.getBitWidth()) {
+ Result.Val = Val.extend(Result.Val.getBitWidth());
+ } else {
+ assert(Result.Val.getBitWidth() == Val.getBitWidth() &&
+ "intmax_t smaller than char/wchar_t?");
+ Result.Val = Val;
+ }
+
+ // Consume the token.
+ Result.setRange(PeekTok.getLocation());
+ PP.LexNonComment(PeekTok);
+ return false;
+ }
+ case tok::l_paren: {
+ SourceLocation Start = PeekTok.getLocation();
+ PP.LexNonComment(PeekTok); // Eat the (.
+ // Parse the value and if there are any binary operators involved, parse
+ // them.
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+
+ // If this is a silly value like (X), which doesn't need parens, check for
+ // !(defined X).
+ if (PeekTok.is(tok::r_paren)) {
+ // Just use DT unmodified as our result.
+ } else {
+ // Otherwise, we have something like (x+y), and we consumed '(x'.
+ if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive,
+ DT.IncludedUndefinedIds, PP))
+ return true;
+
+ if (PeekTok.isNot(tok::r_paren)) {
+ PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_rparen)
+ << Result.getRange();
+ PP.Diag(Start, diag::note_matching) << tok::l_paren;
+ return true;
+ }
+ DT.State = DefinedTracker::Unknown;
+ }
+ Result.setRange(Start, PeekTok.getLocation());
+ Result.setIdentifier(nullptr);
+ PP.LexNonComment(PeekTok); // Eat the ).
+ return false;
+ }
+ case tok::plus: {
+ SourceLocation Start = PeekTok.getLocation();
+ // Unary plus doesn't modify the value.
+ PP.LexNonComment(PeekTok);
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+ Result.setBegin(Start);
+ Result.setIdentifier(nullptr);
+ return false;
+ }
+ case tok::minus: {
+ SourceLocation Loc = PeekTok.getLocation();
+ PP.LexNonComment(PeekTok);
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+ Result.setBegin(Loc);
+ Result.setIdentifier(nullptr);
+
+ // C99 6.5.3.3p3: The sign of the result matches the sign of the operand.
+ Result.Val = -Result.Val;
+
+ // -MININT is the only thing that overflows. Unsigned never overflows.
+ bool Overflow = !Result.isUnsigned() && Result.Val.isMinSignedValue();
+
+ // If this operator is live and overflowed, report the issue.
+ if (Overflow && ValueLive)
+ PP.Diag(Loc, diag::warn_pp_expr_overflow) << Result.getRange();
+
+ DT.State = DefinedTracker::Unknown;
+ return false;
+ }
+
+ case tok::tilde: {
+ SourceLocation Start = PeekTok.getLocation();
+ PP.LexNonComment(PeekTok);
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+ Result.setBegin(Start);
+ Result.setIdentifier(nullptr);
+
+ // C99 6.5.3.3p4: The sign of the result matches the sign of the operand.
+ Result.Val = ~Result.Val;
+ DT.State = DefinedTracker::Unknown;
+ return false;
+ }
+
+ case tok::exclaim: {
+ SourceLocation Start = PeekTok.getLocation();
+ PP.LexNonComment(PeekTok);
+ if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+ Result.setBegin(Start);
+ Result.Val = !Result.Val;
+ // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed.
+ Result.Val.setIsUnsigned(false);
+ Result.setIdentifier(nullptr);
+
+ if (DT.State == DefinedTracker::DefinedMacro)
+ DT.State = DefinedTracker::NotDefinedMacro;
+ else if (DT.State == DefinedTracker::NotDefinedMacro)
+ DT.State = DefinedTracker::DefinedMacro;
+ return false;
+ }
+ case tok::kw_true:
+ case tok::kw_false:
+ Result.Val = PeekTok.getKind() == tok::kw_true;
+ Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0.
+ Result.setIdentifier(PeekTok.getIdentifierInfo());
+ Result.setRange(PeekTok.getLocation());
+ PP.LexNonComment(PeekTok);
+ return false;
+
+ // FIXME: Handle #assert
+ }
+}
+
+/// getPrecedence - Return the precedence of the specified binary operator
+/// token. This returns:
+/// ~0 - Invalid token.
+/// 14 -> 3 - various operators.
+/// 0 - 'eod' or ')'
+static unsigned getPrecedence(tok::TokenKind Kind) {
+ switch (Kind) {
+ default: return ~0U;
+ case tok::percent:
+ case tok::slash:
+ case tok::star: return 14;
+ case tok::plus:
+ case tok::minus: return 13;
+ case tok::lessless:
+ case tok::greatergreater: return 12;
+ case tok::lessequal:
+ case tok::less:
+ case tok::greaterequal:
+ case tok::greater: return 11;
+ case tok::exclaimequal:
+ case tok::equalequal: return 10;
+ case tok::amp: return 9;
+ case tok::caret: return 8;
+ case tok::pipe: return 7;
+ case tok::ampamp: return 6;
+ case tok::pipepipe: return 5;
+ case tok::question: return 4;
+ case tok::comma: return 3;
+ case tok::colon: return 2;
+ case tok::r_paren: return 0;// Lowest priority, end of expr.
+ case tok::eod: return 0;// Lowest priority, end of directive.
+ }
+}
+
+static void diagnoseUnexpectedOperator(Preprocessor &PP, PPValue &LHS,
+ Token &Tok) {
+ if (Tok.is(tok::l_paren) && LHS.getIdentifier())
+ PP.Diag(LHS.getRange().getBegin(), diag::err_pp_expr_bad_token_lparen)
+ << LHS.getIdentifier();
+ else
+ PP.Diag(Tok.getLocation(), diag::err_pp_expr_bad_token_binop)
+ << LHS.getRange();
+}
+
+/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is
+/// PeekTok, and whose precedence is PeekPrec. This returns the result in LHS.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used. As such, avoid diagnostics that relate to
+/// evaluation, such as division by zero warnings.
+static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
+ Token &PeekTok, bool ValueLive,
+ bool &IncludedUndefinedIds,
+ Preprocessor &PP) {
+ unsigned PeekPrec = getPrecedence(PeekTok.getKind());
+ // If this token isn't valid, report the error.
+ if (PeekPrec == ~0U) {
+ diagnoseUnexpectedOperator(PP, LHS, PeekTok);
+ return true;
+ }
+
+ while (true) {
+ // If this token has a lower precedence than we are allowed to parse, return
+ // it so that higher levels of the recursion can parse it.
+ if (PeekPrec < MinPrec)
+ return false;
+
+ tok::TokenKind Operator = PeekTok.getKind();
+
+ // If this is a short-circuiting operator, see if the RHS of the operator is
+ // dead. Note that this cannot just clobber ValueLive. Consider
+ // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)". In
+ // this example, the RHS of the && being dead does not make the rest of the
+ // expr dead.
+ bool RHSIsLive;
+ if (Operator == tok::ampamp && LHS.Val == 0)
+ RHSIsLive = false; // RHS of "0 && x" is dead.
+ else if (Operator == tok::pipepipe && LHS.Val != 0)
+ RHSIsLive = false; // RHS of "1 || x" is dead.
+ else if (Operator == tok::question && LHS.Val == 0)
+ RHSIsLive = false; // RHS (x) of "0 ? x : y" is dead.
+ else
+ RHSIsLive = ValueLive;
+
+ // Consume the operator, remembering the operator's location for reporting.
+ SourceLocation OpLoc = PeekTok.getLocation();
+ PP.LexNonComment(PeekTok);
+
+ PPValue RHS(LHS.getBitWidth());
+ // Parse the RHS of the operator.
+ DefinedTracker DT;
+ if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true;
+ IncludedUndefinedIds = DT.IncludedUndefinedIds;
+
+ // Remember the precedence of this operator and get the precedence of the
+ // operator immediately to the right of the RHS.
+ unsigned ThisPrec = PeekPrec;
+ PeekPrec = getPrecedence(PeekTok.getKind());
+
+ // If this token isn't valid, report the error.
+ if (PeekPrec == ~0U) {
+ diagnoseUnexpectedOperator(PP, RHS, PeekTok);
+ return true;
+ }
+
+ // Decide whether to include the next binop in this subexpression. For
+ // example, when parsing x+y*z and looking at '*', we want to recursively
+ // handle y*z as a single subexpression. We do this because the precedence
+ // of * is higher than that of +. The only strange case we have to handle
+ // here is for the ?: operator, where the precedence is actually lower than
+ // the LHS of the '?'. The grammar rule is:
+ //
+ // conditional-expression ::=
+ // logical-OR-expression ? expression : conditional-expression
+ // where 'expression' is actually comma-expression.
+ unsigned RHSPrec;
+ if (Operator == tok::question)
+ // The RHS of "?" should be maximally consumed as an expression.
+ RHSPrec = getPrecedence(tok::comma);
+ else // All others should munch while higher precedence.
+ RHSPrec = ThisPrec+1;
+
+ if (PeekPrec >= RHSPrec) {
+ if (EvaluateDirectiveSubExpr(RHS, RHSPrec, PeekTok, RHSIsLive,
+ IncludedUndefinedIds, PP))
+ return true;
+ PeekPrec = getPrecedence(PeekTok.getKind());
+ }
+ assert(PeekPrec <= ThisPrec && "Recursion didn't work!");
+
+ // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+ // either operand is unsigned.
+ llvm::APSInt Res(LHS.getBitWidth());
+ switch (Operator) {
+ case tok::question: // No UAC for x and y in "x ? y : z".
+ case tok::lessless: // Shift amount doesn't UAC with shift value.
+ case tok::greatergreater: // Shift amount doesn't UAC with shift value.
+ case tok::comma: // Comma operands are not subject to UACs.
+ case tok::pipepipe: // Logical || does not do UACs.
+ case tok::ampamp: // Logical && does not do UACs.
+ break; // No UAC
+ default:
+ Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned());
+ // If this just promoted something from signed to unsigned, and if the
+ // value was negative, warn about it.
+ if (ValueLive && Res.isUnsigned()) {
+ if (!LHS.isUnsigned() && LHS.Val.isNegative())
+ PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 0
+ << LHS.Val.toString(10, true) + " to " +
+ LHS.Val.toString(10, false)
+ << LHS.getRange() << RHS.getRange();
+ if (!RHS.isUnsigned() && RHS.Val.isNegative())
+ PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 1
+ << RHS.Val.toString(10, true) + " to " +
+ RHS.Val.toString(10, false)
+ << LHS.getRange() << RHS.getRange();
+ }
+ LHS.Val.setIsUnsigned(Res.isUnsigned());
+ RHS.Val.setIsUnsigned(Res.isUnsigned());
+ }
+
+ bool Overflow = false;
+ switch (Operator) {
+ default: llvm_unreachable("Unknown operator token!");
+ case tok::percent:
+ if (RHS.Val != 0)
+ Res = LHS.Val % RHS.Val;
+ else if (ValueLive) {
+ PP.Diag(OpLoc, diag::err_pp_remainder_by_zero)
+ << LHS.getRange() << RHS.getRange();
+ return true;
+ }
+ break;
+ case tok::slash:
+ if (RHS.Val != 0) {
+ if (LHS.Val.isSigned())
+ Res = llvm::APSInt(LHS.Val.sdiv_ov(RHS.Val, Overflow), false);
+ else
+ Res = LHS.Val / RHS.Val;
+ } else if (ValueLive) {
+ PP.Diag(OpLoc, diag::err_pp_division_by_zero)
+ << LHS.getRange() << RHS.getRange();
+ return true;
+ }
+ break;
+
+ case tok::star:
+ if (Res.isSigned())
+ Res = llvm::APSInt(LHS.Val.smul_ov(RHS.Val, Overflow), false);
+ else
+ Res = LHS.Val * RHS.Val;
+ break;
+ case tok::lessless: {
+ // Determine whether overflow is about to happen.
+ if (LHS.isUnsigned())
+ Res = LHS.Val.ushl_ov(RHS.Val, Overflow);
+ else
+ Res = llvm::APSInt(LHS.Val.sshl_ov(RHS.Val, Overflow), false);
+ break;
+ }
+ case tok::greatergreater: {
+ // Determine whether overflow is about to happen.
+ unsigned ShAmt = static_cast<unsigned>(RHS.Val.getLimitedValue());
+ if (ShAmt >= LHS.getBitWidth()) {
+ Overflow = true;
+ ShAmt = LHS.getBitWidth()-1;
+ }
+ Res = LHS.Val >> ShAmt;
+ break;
+ }
+ case tok::plus:
+ if (LHS.isUnsigned())
+ Res = LHS.Val + RHS.Val;
+ else
+ Res = llvm::APSInt(LHS.Val.sadd_ov(RHS.Val, Overflow), false);
+ break;
+ case tok::minus:
+ if (LHS.isUnsigned())
+ Res = LHS.Val - RHS.Val;
+ else
+ Res = llvm::APSInt(LHS.Val.ssub_ov(RHS.Val, Overflow), false);
+ break;
+ case tok::lessequal:
+ Res = LHS.Val <= RHS.Val;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::less:
+ Res = LHS.Val < RHS.Val;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::greaterequal:
+ Res = LHS.Val >= RHS.Val;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::greater:
+ Res = LHS.Val > RHS.Val;
+ Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed)
+ break;
+ case tok::exclaimequal:
+ Res = LHS.Val != RHS.Val;
+ Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed)
+ break;
+ case tok::equalequal:
+ Res = LHS.Val == RHS.Val;
+ Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed)
+ break;
+ case tok::amp:
+ Res = LHS.Val & RHS.Val;
+ break;
+ case tok::caret:
+ Res = LHS.Val ^ RHS.Val;
+ break;
+ case tok::pipe:
+ Res = LHS.Val | RHS.Val;
+ break;
+ case tok::ampamp:
+ Res = (LHS.Val != 0 && RHS.Val != 0);
+ Res.setIsUnsigned(false); // C99 6.5.13p3, result is always int (signed)
+ break;
+ case tok::pipepipe:
+ Res = (LHS.Val != 0 || RHS.Val != 0);
+ Res.setIsUnsigned(false); // C99 6.5.14p3, result is always int (signed)
+ break;
+ case tok::comma:
+ // Comma is invalid in pp expressions in c89/c++ mode, but is valid in C99
+ // if not being evaluated.
+ if (!PP.getLangOpts().C99 || ValueLive)
+ PP.Diag(OpLoc, diag::ext_pp_comma_expr)
+ << LHS.getRange() << RHS.getRange();
+ Res = RHS.Val; // LHS = LHS,RHS -> RHS.
+ break;
+ case tok::question: {
+ // Parse the : part of the expression.
+ if (PeekTok.isNot(tok::colon)) {
+ PP.Diag(PeekTok.getLocation(), diag::err_expected)
+ << tok::colon << LHS.getRange() << RHS.getRange();
+ PP.Diag(OpLoc, diag::note_matching) << tok::question;
+ return true;
+ }
+ // Consume the :.
+ PP.LexNonComment(PeekTok);
+
+ // Evaluate the value after the :.
+ bool AfterColonLive = ValueLive && LHS.Val == 0;
+ PPValue AfterColonVal(LHS.getBitWidth());
+ DefinedTracker DT;
+ if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP))
+ return true;
+
+ // Parse anything after the : with the same precedence as ?. We allow
+ // things of equal precedence because ?: is right associative.
+ if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec,
+ PeekTok, AfterColonLive,
+ IncludedUndefinedIds, PP))
+ return true;
+
+ // Now that we have the condition, the LHS and the RHS of the :, evaluate.
+ Res = LHS.Val != 0 ? RHS.Val : AfterColonVal.Val;
+ RHS.setEnd(AfterColonVal.getRange().getEnd());
+
+ // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+ // either operand is unsigned.
+ Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned());
+
+ // Figure out the precedence of the token after the : part.
+ PeekPrec = getPrecedence(PeekTok.getKind());
+ break;
+ }
+ case tok::colon:
+ // Don't allow :'s to float around without being part of ?: exprs.
+ PP.Diag(OpLoc, diag::err_pp_colon_without_question)
+ << LHS.getRange() << RHS.getRange();
+ return true;
+ }
+
+ // If this operator is live and overflowed, report the issue.
+ if (Overflow && ValueLive)
+ PP.Diag(OpLoc, diag::warn_pp_expr_overflow)
+ << LHS.getRange() << RHS.getRange();
+
+ // Put the result back into 'LHS' for our next iteration.
+ LHS.Val = Res;
+ LHS.setEnd(RHS.getRange().getEnd());
+ RHS.setIdentifier(nullptr);
+ }
+}
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive. If the expression is equivalent
+/// to "!defined(X)" return X in IfNDefMacro.
+Preprocessor::DirectiveEvalResult
+Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+ SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true);
+ // Save the current state of 'DisableMacroExpansion' and reset it to false. If
+ // 'DisableMacroExpansion' is true, then we must be in a macro argument list
+ // in which case a directive is undefined behavior. We want macros to be able
+ // to recursively expand in order to get more gcc-list behavior, so we force
+ // DisableMacroExpansion to false and restore it when we're done parsing the
+ // expression.
+ bool DisableMacroExpansionAtStartOfDirective = DisableMacroExpansion;
+ DisableMacroExpansion = false;
+
+ // Peek ahead one token.
+ Token Tok;
+ LexNonComment(Tok);
+
+ // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
+ unsigned BitWidth = getTargetInfo().getIntMaxTWidth();
+
+ PPValue ResVal(BitWidth);
+ DefinedTracker DT;
+ SourceLocation ExprStartLoc = SourceMgr.getExpansionLoc(Tok.getLocation());
+ if (EvaluateValue(ResVal, Tok, DT, true, *this)) {
+ // Parse error, skip the rest of the macro line.
+ SourceRange ConditionRange = ExprStartLoc;
+ if (Tok.isNot(tok::eod))
+ ConditionRange = DiscardUntilEndOfDirective();
+
+ // Restore 'DisableMacroExpansion'.
+ DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+
+ // We cannot trust the source range from the value because there was a
+ // parse error. Track the range manually -- the end of the directive is the
+ // end of the condition range.
+ return {false,
+ DT.IncludedUndefinedIds,
+ {ExprStartLoc, ConditionRange.getEnd()}};
+ }
+
+ // If we are at the end of the expression after just parsing a value, there
+ // must be no (unparenthesized) binary operators involved, so we can exit
+ // directly.
+ if (Tok.is(tok::eod)) {
+ // If the expression we parsed was of the form !defined(macro), return the
+ // macro in IfNDefMacro.
+ if (DT.State == DefinedTracker::NotDefinedMacro)
+ IfNDefMacro = DT.TheMacro;
+
+ // Restore 'DisableMacroExpansion'.
+ DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+ return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+ }
+
+ // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
+ // operator and the stuff after it.
+ if (EvaluateDirectiveSubExpr(ResVal, getPrecedence(tok::question),
+ Tok, true, DT.IncludedUndefinedIds, *this)) {
+ // Parse error, skip the rest of the macro line.
+ if (Tok.isNot(tok::eod))
+ DiscardUntilEndOfDirective();
+
+ // Restore 'DisableMacroExpansion'.
+ DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+ return {false, DT.IncludedUndefinedIds, ResVal.getRange()};
+ }
+
+ // If we aren't at the tok::eod token, something bad happened, like an extra
+ // ')' token.
+ if (Tok.isNot(tok::eod)) {
+ Diag(Tok, diag::err_pp_expected_eol);
+ DiscardUntilEndOfDirective();
+ }
+
+ // Restore 'DisableMacroExpansion'.
+ DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+ return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
+}
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
new file mode 100644
index 000000000000..802172693960
--- /dev/null
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -0,0 +1,834 @@
+//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// current lexer stack.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroInfo.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+using namespace clang;
+
+PPCallbacks::~PPCallbacks() {}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Methods.
+//===----------------------------------------------------------------------===//
+
+/// isInPrimaryFile - Return true if we're in the top-level file, not in a
+/// \#include. This looks through macro expansions and active _Pragma lexers.
+bool Preprocessor::isInPrimaryFile() const {
+ if (IsFileLexer())
+ return IncludeMacroStack.empty();
+
+ // If there are any stacked lexers, we're in a #include.
+ assert(IsFileLexer(IncludeMacroStack[0]) &&
+ "Top level include stack isn't our primary lexer?");
+ return std::none_of(
+ IncludeMacroStack.begin() + 1, IncludeMacroStack.end(),
+ [&](const IncludeStackInfo &ISI) -> bool { return IsFileLexer(ISI); });
+}
+
+/// getCurrentLexer - Return the current file lexer being lexed from. Note
+/// that this ignores any potentially active macro expansions and _Pragma
+/// expansions going on at the time.
+PreprocessorLexer *Preprocessor::getCurrentFileLexer() const {
+ if (IsFileLexer())
+ return CurPPLexer;
+
+ // Look for a stacked lexer.
+ for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
+ if (IsFileLexer(ISI))
+ return ISI.ThePPLexer;
+ }
+ return nullptr;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for Entering and Callbacks for leaving various contexts
+//===----------------------------------------------------------------------===//
+
+/// EnterSourceFile - Add a source file to the top of the include stack and
+/// start lexing tokens from it instead of the current buffer.
+bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
+ SourceLocation Loc) {
+ assert(!CurTokenLexer && "Cannot #include a file inside a macro!");
+ ++NumEnteredSourceFiles;
+
+ if (MaxIncludeStackDepth < IncludeMacroStack.size())
+ MaxIncludeStackDepth = IncludeMacroStack.size();
+
+ // Get the MemoryBuffer for this FID, if it fails, we fail.
+ bool Invalid = false;
+ const llvm::MemoryBuffer *InputFile =
+ getSourceManager().getBuffer(FID, Loc, &Invalid);
+ if (Invalid) {
+ SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID);
+ Diag(Loc, diag::err_pp_error_opening_file)
+ << std::string(SourceMgr.getBufferName(FileStart)) << "";
+ return true;
+ }
+
+ if (isCodeCompletionEnabled() &&
+ SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) {
+ CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID);
+ CodeCompletionLoc =
+ CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
+ }
+
+ EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);
+ return false;
+}
+
+/// EnterSourceFileWithLexer - Add a source file to the top of the include stack
+/// and start lexing tokens from it instead of the current buffer.
+void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
+ const DirectoryLookup *CurDir) {
+
+ // Add the current lexer to the include stack.
+ if (CurPPLexer || CurTokenLexer)
+ PushIncludeMacroStack();
+
+ CurLexer.reset(TheLexer);
+ CurPPLexer = TheLexer;
+ CurDirLookup = CurDir;
+ CurLexerSubmodule = nullptr;
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_Lexer;
+
+ // Notify the client, if desired, that we are in a new source file.
+ if (Callbacks && !CurLexer->Is_PragmaLexer) {
+ SrcMgr::CharacteristicKind FileType =
+ SourceMgr.getFileCharacteristic(CurLexer->getFileLoc());
+
+ Callbacks->FileChanged(CurLexer->getFileLoc(),
+ PPCallbacks::EnterFile, FileType);
+ }
+}
+
+/// EnterMacro - Add a Macro to the top of the include stack and start lexing
+/// tokens from it instead of the current buffer.
+void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
+ MacroInfo *Macro, MacroArgs *Args) {
+ std::unique_ptr<TokenLexer> TokLexer;
+ if (NumCachedTokenLexers == 0) {
+ TokLexer = std::make_unique<TokenLexer>(Tok, ILEnd, Macro, Args, *this);
+ } else {
+ TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
+ TokLexer->Init(Tok, ILEnd, Macro, Args);
+ }
+
+ PushIncludeMacroStack();
+ CurDirLookup = nullptr;
+ CurTokenLexer = std::move(TokLexer);
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_TokenLexer;
+}
+
+/// EnterTokenStream - Add a "macro" context to the top of the include stack,
+/// which will cause the lexer to start returning the specified tokens.
+///
+/// If DisableMacroExpansion is true, tokens lexed from the token stream will
+/// not be subject to further macro expansion. Otherwise, these tokens will
+/// be re-macro-expanded when/if expansion is enabled.
+///
+/// If OwnsTokens is false, this method assumes that the specified stream of
+/// tokens has a permanent owner somewhere, so they do not need to be copied.
+/// If it is true, it assumes the array of tokens is allocated with new[] and
+/// must be freed.
+///
+void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
+ bool DisableMacroExpansion, bool OwnsTokens,
+ bool IsReinject) {
+ if (CurLexerKind == CLK_CachingLexer) {
+ if (CachedLexPos < CachedTokens.size()) {
+ assert(IsReinject && "new tokens in the middle of cached stream");
+ // We're entering tokens into the middle of our cached token stream. We
+ // can't represent that, so just insert the tokens into the buffer.
+ CachedTokens.insert(CachedTokens.begin() + CachedLexPos,
+ Toks, Toks + NumToks);
+ if (OwnsTokens)
+ delete [] Toks;
+ return;
+ }
+
+ // New tokens are at the end of the cached token sequnece; insert the
+ // token stream underneath the caching lexer.
+ ExitCachingLexMode();
+ EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens,
+ IsReinject);
+ EnterCachingLexMode();
+ return;
+ }
+
+ // Create a macro expander to expand from the specified token stream.
+ std::unique_ptr<TokenLexer> TokLexer;
+ if (NumCachedTokenLexers == 0) {
+ TokLexer = std::make_unique<TokenLexer>(
+ Toks, NumToks, DisableMacroExpansion, OwnsTokens, IsReinject, *this);
+ } else {
+ TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
+ TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens,
+ IsReinject);
+ }
+
+ // Save our current state.
+ PushIncludeMacroStack();
+ CurDirLookup = nullptr;
+ CurTokenLexer = std::move(TokLexer);
+ if (CurLexerKind != CLK_LexAfterModuleImport)
+ CurLexerKind = CLK_TokenLexer;
+}
+
+/// Compute the relative path that names the given file relative to
+/// the given directory.
+static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir,
+ const FileEntry *File,
+ SmallString<128> &Result) {
+ Result.clear();
+
+ StringRef FilePath = File->getDir()->getName();
+ StringRef Path = FilePath;
+ while (!Path.empty()) {
+ if (auto CurDir = FM.getDirectory(Path)) {
+ if (*CurDir == Dir) {
+ Result = FilePath.substr(Path.size());
+ llvm::sys::path::append(Result,
+ llvm::sys::path::filename(File->getName()));
+ return;
+ }
+ }
+
+ Path = llvm::sys::path::parent_path(Path);
+ }
+
+ Result = File->getName();
+}
+
+void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+ if (CurTokenLexer) {
+ CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result);
+ return;
+ }
+ if (CurLexer) {
+ CurLexer->PropagateLineStartLeadingSpaceInfo(Result);
+ return;
+ }
+ // FIXME: Handle other kinds of lexers? It generally shouldn't matter,
+ // but it might if they're empty?
+}
+
+/// Determine the location to use as the end of the buffer for a lexer.
+///
+/// If the file ends with a newline, form the EOF token on the newline itself,
+/// rather than "on the line following it", which doesn't exist. This makes
+/// diagnostics relating to the end of file include the last file that the user
+/// actually typed, which is goodness.
+const char *Preprocessor::getCurLexerEndPos() {
+ const char *EndPos = CurLexer->BufferEnd;
+ if (EndPos != CurLexer->BufferStart &&
+ (EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
+ --EndPos;
+
+ // Handle \n\r and \r\n:
+ if (EndPos != CurLexer->BufferStart &&
+ (EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
+ EndPos[-1] != EndPos[0])
+ --EndPos;
+ }
+
+ return EndPos;
+}
+
+static void collectAllSubModulesWithUmbrellaHeader(
+ const Module &Mod, SmallVectorImpl<const Module *> &SubMods) {
+ if (Mod.getUmbrellaHeader())
+ SubMods.push_back(&Mod);
+ for (auto *M : Mod.submodules())
+ collectAllSubModulesWithUmbrellaHeader(*M, SubMods);
+}
+
+void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) {
+ assert(Mod.getUmbrellaHeader() && "Module must use umbrella header");
+ SourceLocation StartLoc =
+ SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
+ if (getDiagnostics().isIgnored(diag::warn_uncovered_module_header, StartLoc))
+ return;
+
+ ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
+ const DirectoryEntry *Dir = Mod.getUmbrellaDir().Entry;
+ llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
+ std::error_code EC;
+ for (llvm::vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC),
+ End;
+ Entry != End && !EC; Entry.increment(EC)) {
+ using llvm::StringSwitch;
+
+ // Check whether this entry has an extension typically associated with
+ // headers.
+ if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->path()))
+ .Cases(".h", ".H", ".hh", ".hpp", true)
+ .Default(false))
+ continue;
+
+ if (auto Header = getFileManager().getFile(Entry->path()))
+ if (!getSourceManager().hasFileInfo(*Header)) {
+ if (!ModMap.isHeaderInUnavailableModule(*Header)) {
+ // Find the relative path that would access this header.
+ SmallString<128> RelativePath;
+ computeRelativePath(FileMgr, Dir, *Header, RelativePath);
+ Diag(StartLoc, diag::warn_uncovered_module_header)
+ << Mod.getFullModuleName() << RelativePath;
+ }
+ }
+ }
+}
+
+/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
+/// the current file. This either returns the EOF token or pops a level off
+/// the include stack and keeps going.
+bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
+ assert(!CurTokenLexer &&
+ "Ending a file when currently in a macro!");
+
+ // If we have an unclosed module region from a pragma at the end of a
+ // module, complain and close it now.
+ const bool LeavingSubmodule = CurLexer && CurLexerSubmodule;
+ if ((LeavingSubmodule || IncludeMacroStack.empty()) &&
+ !BuildingSubmoduleStack.empty() &&
+ BuildingSubmoduleStack.back().IsPragma) {
+ Diag(BuildingSubmoduleStack.back().ImportLoc,
+ diag::err_pp_module_begin_without_module_end);
+ Module *M = LeaveSubmodule(/*ForPragma*/true);
+
+ Result.startToken();
+ const char *EndPos = getCurLexerEndPos();
+ CurLexer->BufferPtr = EndPos;
+ CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
+ Result.setAnnotationEndLoc(Result.getLocation());
+ Result.setAnnotationValue(M);
+ return true;
+ }
+
+ // See if this file had a controlling macro.
+ if (CurPPLexer) { // Not ending a macro, ignore it.
+ if (const IdentifierInfo *ControllingMacro =
+ CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
+ // Okay, this has a controlling macro, remember in HeaderFileInfo.
+ if (const FileEntry *FE = CurPPLexer->getFileEntry()) {
+ HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
+ if (MacroInfo *MI =
+ getMacroInfo(const_cast<IdentifierInfo*>(ControllingMacro)))
+ MI->setUsedForHeaderGuard(true);
+ if (const IdentifierInfo *DefinedMacro =
+ CurPPLexer->MIOpt.GetDefinedMacro()) {
+ if (!isMacroDefined(ControllingMacro) &&
+ DefinedMacro != ControllingMacro &&
+ HeaderInfo.FirstTimeLexingFile(FE)) {
+
+ // If the edit distance between the two macros is more than 50%,
+ // DefinedMacro may not be header guard, or can be header guard of
+ // another header file. Therefore, it maybe defining something
+ // completely different. This can be observed in the wild when
+ // handling feature macros or header guards in different files.
+
+ const StringRef ControllingMacroName = ControllingMacro->getName();
+ const StringRef DefinedMacroName = DefinedMacro->getName();
+ const size_t MaxHalfLength = std::max(ControllingMacroName.size(),
+ DefinedMacroName.size()) / 2;
+ const unsigned ED = ControllingMacroName.edit_distance(
+ DefinedMacroName, true, MaxHalfLength);
+ if (ED <= MaxHalfLength) {
+ // Emit a warning for a bad header guard.
+ Diag(CurPPLexer->MIOpt.GetMacroLocation(),
+ diag::warn_header_guard)
+ << CurPPLexer->MIOpt.GetMacroLocation() << ControllingMacro;
+ Diag(CurPPLexer->MIOpt.GetDefinedLocation(),
+ diag::note_header_guard)
+ << CurPPLexer->MIOpt.GetDefinedLocation() << DefinedMacro
+ << ControllingMacro
+ << FixItHint::CreateReplacement(
+ CurPPLexer->MIOpt.GetDefinedLocation(),
+ ControllingMacro->getName());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Complain about reaching a true EOF within arc_cf_code_audited.
+ // We don't want to complain about reaching the end of a macro
+ // instantiation or a _Pragma.
+ if (PragmaARCCFCodeAuditedInfo.second.isValid() && !isEndOfMacro &&
+ !(CurLexer && CurLexer->Is_PragmaLexer)) {
+ Diag(PragmaARCCFCodeAuditedInfo.second,
+ diag::err_pp_eof_in_arc_cf_code_audited);
+
+ // Recover by leaving immediately.
+ PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
+ }
+
+ // Complain about reaching a true EOF within assume_nonnull.
+ // We don't want to complain about reaching the end of a macro
+ // instantiation or a _Pragma.
+ if (PragmaAssumeNonNullLoc.isValid() &&
+ !isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
+ Diag(PragmaAssumeNonNullLoc, diag::err_pp_eof_in_assume_nonnull);
+
+ // Recover by leaving immediately.
+ PragmaAssumeNonNullLoc = SourceLocation();
+ }
+
+ bool LeavingPCHThroughHeader = false;
+
+ // If this is a #include'd file, pop it off the include stack and continue
+ // lexing the #includer file.
+ if (!IncludeMacroStack.empty()) {
+
+ // If we lexed the code-completion file, act as if we reached EOF.
+ if (isCodeCompletionEnabled() && CurPPLexer &&
+ SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) ==
+ CodeCompletionFileLoc) {
+ assert(CurLexer && "Got EOF but no current lexer set!");
+ Result.startToken();
+ CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
+ CurLexer.reset();
+
+ CurPPLexer = nullptr;
+ recomputeCurLexerKind();
+ return true;
+ }
+
+ if (!isEndOfMacro && CurPPLexer &&
+ SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) {
+ // Notify SourceManager to record the number of FileIDs that were created
+ // during lexing of the #include'd file.
+ unsigned NumFIDs =
+ SourceMgr.local_sloc_entry_size() -
+ CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/;
+ SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);
+ }
+
+ bool ExitedFromPredefinesFile = false;
+ FileID ExitedFID;
+ if (!isEndOfMacro && CurPPLexer) {
+ ExitedFID = CurPPLexer->getFileID();
+
+ assert(PredefinesFileID.isValid() &&
+ "HandleEndOfFile is called before PredefinesFileId is set");
+ ExitedFromPredefinesFile = (PredefinesFileID == ExitedFID);
+ }
+
+ if (LeavingSubmodule) {
+ // We're done with this submodule.
+ Module *M = LeaveSubmodule(/*ForPragma*/false);
+
+ // Notify the parser that we've left the module.
+ const char *EndPos = getCurLexerEndPos();
+ Result.startToken();
+ CurLexer->BufferPtr = EndPos;
+ CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
+ Result.setAnnotationEndLoc(Result.getLocation());
+ Result.setAnnotationValue(M);
+ }
+
+ bool FoundPCHThroughHeader = false;
+ if (CurPPLexer && creatingPCHWithThroughHeader() &&
+ isPCHThroughHeader(
+ SourceMgr.getFileEntryForID(CurPPLexer->getFileID())))
+ FoundPCHThroughHeader = true;
+
+ // We're done with the #included file.
+ RemoveTopOfLexerStack();
+
+ // Propagate info about start-of-line/leading white-space/etc.
+ PropagateLineStartLeadingSpaceInfo(Result);
+
+ // Notify the client, if desired, that we are in a new source file.
+ if (Callbacks && !isEndOfMacro && CurPPLexer) {
+ SrcMgr::CharacteristicKind FileType =
+ SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation());
+ Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
+ PPCallbacks::ExitFile, FileType, ExitedFID);
+ }
+
+ // Restore conditional stack from the preamble right after exiting from the
+ // predefines file.
+ if (ExitedFromPredefinesFile)
+ replayPreambleConditionalStack();
+
+ if (!isEndOfMacro && CurPPLexer && FoundPCHThroughHeader &&
+ (isInPrimaryFile() ||
+ CurPPLexer->getFileID() == getPredefinesFileID())) {
+ // Leaving the through header. Continue directly to end of main file
+ // processing.
+ LeavingPCHThroughHeader = true;
+ } else {
+ // Client should lex another token unless we generated an EOM.
+ return LeavingSubmodule;
+ }
+ }
+
+ // If this is the end of the main file, form an EOF token.
+ assert(CurLexer && "Got EOF but no current lexer set!");
+ const char *EndPos = getCurLexerEndPos();
+ Result.startToken();
+ CurLexer->BufferPtr = EndPos;
+ CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
+
+ if (isCodeCompletionEnabled()) {
+ // Inserting the code-completion point increases the source buffer by 1,
+ // but the main FileID was created before inserting the point.
+ // Compensate by reducing the EOF location by 1, otherwise the location
+ // will point to the next FileID.
+ // FIXME: This is hacky, the code-completion point should probably be
+ // inserted before the main FileID is created.
+ if (CurLexer->getFileLoc() == CodeCompletionFileLoc)
+ Result.setLocation(Result.getLocation().getLocWithOffset(-1));
+ }
+
+ if (creatingPCHWithThroughHeader() && !LeavingPCHThroughHeader) {
+ // Reached the end of the compilation without finding the through header.
+ Diag(CurLexer->getFileLoc(), diag::err_pp_through_header_not_seen)
+ << PPOpts->PCHThroughHeader << 0;
+ }
+
+ if (!isIncrementalProcessingEnabled())
+ // We're done with lexing.
+ CurLexer.reset();
+
+ if (!isIncrementalProcessingEnabled())
+ CurPPLexer = nullptr;
+
+ if (TUKind == TU_Complete) {
+ // This is the end of the top-level file. 'WarnUnusedMacroLocs' has
+ // collected all macro locations that we need to warn because they are not
+ // used.
+ for (WarnUnusedMacroLocsTy::iterator
+ I=WarnUnusedMacroLocs.begin(), E=WarnUnusedMacroLocs.end();
+ I!=E; ++I)
+ Diag(*I, diag::pp_macro_not_used);
+ }
+
+ // If we are building a module that has an umbrella header, make sure that
+ // each of the headers within the directory, including all submodules, is
+ // covered by the umbrella header was actually included by the umbrella
+ // header.
+ if (Module *Mod = getCurrentModule()) {
+ llvm::SmallVector<const Module *, 4> AllMods;
+ collectAllSubModulesWithUmbrellaHeader(*Mod, AllMods);
+ for (auto *M : AllMods)
+ diagnoseMissingHeaderInUmbrellaDir(*M);
+ }
+
+ return true;
+}
+
+/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
+/// hits the end of its token stream.
+bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
+ assert(CurTokenLexer && !CurPPLexer &&
+ "Ending a macro when currently in a #include file!");
+
+ if (!MacroExpandingLexersStack.empty() &&
+ MacroExpandingLexersStack.back().first == CurTokenLexer.get())
+ removeCachedMacroExpandedTokensOfLastLexer();
+
+ // Delete or cache the now-dead macro expander.
+ if (NumCachedTokenLexers == TokenLexerCacheSize)
+ CurTokenLexer.reset();
+ else
+ TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
+
+ // Handle this like a #include file being popped off the stack.
+ return HandleEndOfFile(Result, true);
+}
+
+/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
+/// lexer stack. This should only be used in situations where the current
+/// state of the top-of-stack lexer is unknown.
+void Preprocessor::RemoveTopOfLexerStack() {
+ assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
+
+ if (CurTokenLexer) {
+ // Delete or cache the now-dead macro expander.
+ if (NumCachedTokenLexers == TokenLexerCacheSize)
+ CurTokenLexer.reset();
+ else
+ TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
+ }
+
+ PopIncludeMacroStack();
+}
+
+/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
+/// comment (/##/) in microsoft mode, this method handles updating the current
+/// state, returning the token on the next source line.
+void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
+ assert(CurTokenLexer && !CurPPLexer &&
+ "Pasted comment can only be formed from macro");
+ // We handle this by scanning for the closest real lexer, switching it to
+ // raw mode and preprocessor mode. This will cause it to return \n as an
+ // explicit EOD token.
+ PreprocessorLexer *FoundLexer = nullptr;
+ bool LexerWasInPPMode = false;
+ for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
+ if (ISI.ThePPLexer == nullptr) continue; // Scan for a real lexer.
+
+ // Once we find a real lexer, mark it as raw mode (disabling macro
+ // expansions) and preprocessor mode (return EOD). We know that the lexer
+ // was *not* in raw mode before, because the macro that the comment came
+ // from was expanded. However, it could have already been in preprocessor
+ // mode (#if COMMENT) in which case we have to return it to that mode and
+ // return EOD.
+ FoundLexer = ISI.ThePPLexer;
+ FoundLexer->LexingRawMode = true;
+ LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
+ FoundLexer->ParsingPreprocessorDirective = true;
+ break;
+ }
+
+ // Okay, we either found and switched over the lexer, or we didn't find a
+ // lexer. In either case, finish off the macro the comment came from, getting
+ // the next token.
+ if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
+
+ // Discarding comments as long as we don't have EOF or EOD. This 'comments
+ // out' the rest of the line, including any tokens that came from other macros
+ // that were active, as in:
+ // #define submacro a COMMENT b
+ // submacro c
+ // which should lex to 'a' only: 'b' and 'c' should be removed.
+ while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof))
+ Lex(Tok);
+
+ // If we got an eod token, then we successfully found the end of the line.
+ if (Tok.is(tok::eod)) {
+ assert(FoundLexer && "Can't get end of line without an active lexer");
+ // Restore the lexer back to normal mode instead of raw mode.
+ FoundLexer->LexingRawMode = false;
+
+ // If the lexer was already in preprocessor mode, just return the EOD token
+ // to finish the preprocessor line.
+ if (LexerWasInPPMode) return;
+
+ // Otherwise, switch out of PP mode and return the next lexed token.
+ FoundLexer->ParsingPreprocessorDirective = false;
+ return Lex(Tok);
+ }
+
+ // If we got an EOF token, then we reached the end of the token stream but
+ // didn't find an explicit \n. This can only happen if there was no lexer
+ // active (an active lexer would return EOD at EOF if there was no \n in
+ // preprocessor directive mode), so just return EOF as our token.
+ assert(!FoundLexer && "Lexer should return EOD before EOF in PP mode");
+}
+
+void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc,
+ bool ForPragma) {
+ if (!getLangOpts().ModulesLocalVisibility) {
+ // Just track that we entered this submodule.
+ BuildingSubmoduleStack.push_back(
+ BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
+ PendingModuleMacroNames.size()));
+ if (Callbacks)
+ Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma);
+ return;
+ }
+
+ // Resolve as much of the module definition as we can now, before we enter
+ // one of its headers.
+ // FIXME: Can we enable Complain here?
+ // FIXME: Can we do this when local visibility is disabled?
+ ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
+ ModMap.resolveExports(M, /*Complain=*/false);
+ ModMap.resolveUses(M, /*Complain=*/false);
+ ModMap.resolveConflicts(M, /*Complain=*/false);
+
+ // If this is the first time we've entered this module, set up its state.
+ auto R = Submodules.insert(std::make_pair(M, SubmoduleState()));
+ auto &State = R.first->second;
+ bool FirstTime = R.second;
+ if (FirstTime) {
+ // Determine the set of starting macros for this submodule; take these
+ // from the "null" module (the predefines buffer).
+ //
+ // FIXME: If we have local visibility but not modules enabled, the
+ // NullSubmoduleState is polluted by #defines in the top-level source
+ // file.
+ auto &StartingMacros = NullSubmoduleState.Macros;
+
+ // Restore to the starting state.
+ // FIXME: Do this lazily, when each macro name is first referenced.
+ for (auto &Macro : StartingMacros) {
+ // Skip uninteresting macros.
+ if (!Macro.second.getLatest() &&
+ Macro.second.getOverriddenMacros().empty())
+ continue;
+
+ MacroState MS(Macro.second.getLatest());
+ MS.setOverriddenMacros(*this, Macro.second.getOverriddenMacros());
+ State.Macros.insert(std::make_pair(Macro.first, std::move(MS)));
+ }
+ }
+
+ // Track that we entered this module.
+ BuildingSubmoduleStack.push_back(
+ BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
+ PendingModuleMacroNames.size()));
+
+ if (Callbacks)
+ Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma);
+
+ // Switch to this submodule as the current submodule.
+ CurSubmoduleState = &State;
+
+ // This module is visible to itself.
+ if (FirstTime)
+ makeModuleVisible(M, ImportLoc);
+}
+
+bool Preprocessor::needModuleMacros() const {
+ // If we're not within a submodule, we never need to create ModuleMacros.
+ if (BuildingSubmoduleStack.empty())
+ return false;
+ // If we are tracking module macro visibility even for textually-included
+ // headers, we need ModuleMacros.
+ if (getLangOpts().ModulesLocalVisibility)
+ return true;
+ // Otherwise, we only need module macros if we're actually compiling a module
+ // interface.
+ return getLangOpts().isCompilingModule();
+}
+
+Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
+ if (BuildingSubmoduleStack.empty() ||
+ BuildingSubmoduleStack.back().IsPragma != ForPragma) {
+ assert(ForPragma && "non-pragma module enter/leave mismatch");
+ return nullptr;
+ }
+
+ auto &Info = BuildingSubmoduleStack.back();
+
+ Module *LeavingMod = Info.M;
+ SourceLocation ImportLoc = Info.ImportLoc;
+
+ if (!needModuleMacros() ||
+ (!getLangOpts().ModulesLocalVisibility &&
+ LeavingMod->getTopLevelModuleName() != getLangOpts().CurrentModule)) {
+ // If we don't need module macros, or this is not a module for which we
+ // are tracking macro visibility, don't build any, and preserve the list
+ // of pending names for the surrounding submodule.
+ BuildingSubmoduleStack.pop_back();
+
+ if (Callbacks)
+ Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma);
+
+ makeModuleVisible(LeavingMod, ImportLoc);
+ return LeavingMod;
+ }
+
+ // Create ModuleMacros for any macros defined in this submodule.
+ llvm::SmallPtrSet<const IdentifierInfo*, 8> VisitedMacros;
+ for (unsigned I = Info.OuterPendingModuleMacroNames;
+ I != PendingModuleMacroNames.size(); ++I) {
+ auto *II = const_cast<IdentifierInfo*>(PendingModuleMacroNames[I]);
+ if (!VisitedMacros.insert(II).second)
+ continue;
+
+ auto MacroIt = CurSubmoduleState->Macros.find(II);
+ if (MacroIt == CurSubmoduleState->Macros.end())
+ continue;
+ auto &Macro = MacroIt->second;
+
+ // Find the starting point for the MacroDirective chain in this submodule.
+ MacroDirective *OldMD = nullptr;
+ auto *OldState = Info.OuterSubmoduleState;
+ if (getLangOpts().ModulesLocalVisibility)
+ OldState = &NullSubmoduleState;
+ if (OldState && OldState != CurSubmoduleState) {
+ // FIXME: It'd be better to start at the state from when we most recently
+ // entered this submodule, but it doesn't really matter.
+ auto &OldMacros = OldState->Macros;
+ auto OldMacroIt = OldMacros.find(II);
+ if (OldMacroIt == OldMacros.end())
+ OldMD = nullptr;
+ else
+ OldMD = OldMacroIt->second.getLatest();
+ }
+
+ // This module may have exported a new macro. If so, create a ModuleMacro
+ // representing that fact.
+ bool ExplicitlyPublic = false;
+ for (auto *MD = Macro.getLatest(); MD != OldMD; MD = MD->getPrevious()) {
+ assert(MD && "broken macro directive chain");
+
+ if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
+ // The latest visibility directive for a name in a submodule affects
+ // all the directives that come before it.
+ if (VisMD->isPublic())
+ ExplicitlyPublic = true;
+ else if (!ExplicitlyPublic)
+ // Private with no following public directive: not exported.
+ break;
+ } else {
+ MacroInfo *Def = nullptr;
+ if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
+ Def = DefMD->getInfo();
+
+ // FIXME: Issue a warning if multiple headers for the same submodule
+ // define a macro, rather than silently ignoring all but the first.
+ bool IsNew;
+ // Don't bother creating a module macro if it would represent a #undef
+ // that doesn't override anything.
+ if (Def || !Macro.getOverriddenMacros().empty())
+ addModuleMacro(LeavingMod, II, Def,
+ Macro.getOverriddenMacros(), IsNew);
+
+ if (!getLangOpts().ModulesLocalVisibility) {
+ // This macro is exposed to the rest of this compilation as a
+ // ModuleMacro; we don't need to track its MacroDirective any more.
+ Macro.setLatest(nullptr);
+ Macro.setOverriddenMacros(*this, {});
+ }
+ break;
+ }
+ }
+ }
+ PendingModuleMacroNames.resize(Info.OuterPendingModuleMacroNames);
+
+ // FIXME: Before we leave this submodule, we should parse all the other
+ // headers within it. Otherwise, we're left with an inconsistent state
+ // where we've made the module visible but don't yet have its complete
+ // contents.
+
+ // Put back the outer module's state, if we're tracking it.
+ if (getLangOpts().ModulesLocalVisibility)
+ CurSubmoduleState = Info.OuterSubmoduleState;
+
+ BuildingSubmoduleStack.pop_back();
+
+ if (Callbacks)
+ Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma);
+
+ // A nested #include makes the included submodule visible.
+ makeModuleVisible(LeavingMod, ImportLoc);
+ return LeavingMod;
+}
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
new file mode 100644
index 000000000000..dfbcaedcacff
--- /dev/null
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -0,0 +1,1845 @@
+//===--- PPMacroExpansion.cpp - Top level Macro Expansion -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top level handling of macro expansion for the
+// preprocessor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Attributes.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/ObjCRuntime.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/DirectoryLookup.h"
+#include "clang/Lex/ExternalPreprocessorSource.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <ctime>
+#include <string>
+#include <tuple>
+#include <utility>
+
+using namespace clang;
+
+MacroDirective *
+Preprocessor::getLocalMacroDirectiveHistory(const IdentifierInfo *II) const {
+ if (!II->hadMacroDefinition())
+ return nullptr;
+ auto Pos = CurSubmoduleState->Macros.find(II);
+ return Pos == CurSubmoduleState->Macros.end() ? nullptr
+ : Pos->second.getLatest();
+}
+
+void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){
+ assert(MD && "MacroDirective should be non-zero!");
+ assert(!MD->getPrevious() && "Already attached to a MacroDirective history.");
+
+ MacroState &StoredMD = CurSubmoduleState->Macros[II];
+ auto *OldMD = StoredMD.getLatest();
+ MD->setPrevious(OldMD);
+ StoredMD.setLatest(MD);
+ StoredMD.overrideActiveModuleMacros(*this, II);
+
+ if (needModuleMacros()) {
+ // Track that we created a new macro directive, so we know we should
+ // consider building a ModuleMacro for it when we get to the end of
+ // the module.
+ PendingModuleMacroNames.push_back(II);
+ }
+
+ // Set up the identifier as having associated macro history.
+ II->setHasMacroDefinition(true);
+ if (!MD->isDefined() && LeafModuleMacros.find(II) == LeafModuleMacros.end())
+ II->setHasMacroDefinition(false);
+ if (II->isFromAST())
+ II->setChangedSinceDeserialization();
+}
+
+void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II,
+ MacroDirective *ED,
+ MacroDirective *MD) {
+ // Normally, when a macro is defined, it goes through appendMacroDirective()
+ // above, which chains a macro to previous defines, undefs, etc.
+ // However, in a pch, the whole macro history up to the end of the pch is
+ // stored, so ASTReader goes through this function instead.
+ // However, built-in macros are already registered in the Preprocessor
+ // ctor, and ASTWriter stops writing the macro chain at built-in macros,
+ // so in that case the chain from the pch needs to be spliced to the existing
+ // built-in.
+
+ assert(II && MD);
+ MacroState &StoredMD = CurSubmoduleState->Macros[II];
+
+ if (auto *OldMD = StoredMD.getLatest()) {
+ // shouldIgnoreMacro() in ASTWriter also stops at macros from the
+ // predefines buffer in module builds. However, in module builds, modules
+ // are loaded completely before predefines are processed, so StoredMD
+ // will be nullptr for them when they're loaded. StoredMD should only be
+ // non-nullptr for builtins read from a pch file.
+ assert(OldMD->getMacroInfo()->isBuiltinMacro() &&
+ "only built-ins should have an entry here");
+ assert(!OldMD->getPrevious() && "builtin should only have a single entry");
+ ED->setPrevious(OldMD);
+ StoredMD.setLatest(MD);
+ } else {
+ StoredMD = MD;
+ }
+
+ // Setup the identifier as having associated macro history.
+ II->setHasMacroDefinition(true);
+ if (!MD->isDefined() && LeafModuleMacros.find(II) == LeafModuleMacros.end())
+ II->setHasMacroDefinition(false);
+}
+
+ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, IdentifierInfo *II,
+ MacroInfo *Macro,
+ ArrayRef<ModuleMacro *> Overrides,
+ bool &New) {
+ llvm::FoldingSetNodeID ID;
+ ModuleMacro::Profile(ID, Mod, II);
+
+ void *InsertPos;
+ if (auto *MM = ModuleMacros.FindNodeOrInsertPos(ID, InsertPos)) {
+ New = false;
+ return MM;
+ }
+
+ auto *MM = ModuleMacro::create(*this, Mod, II, Macro, Overrides);
+ ModuleMacros.InsertNode(MM, InsertPos);
+
+ // Each overridden macro is now overridden by one more macro.
+ bool HidAny = false;
+ for (auto *O : Overrides) {
+ HidAny |= (O->NumOverriddenBy == 0);
+ ++O->NumOverriddenBy;
+ }
+
+ // If we were the first overrider for any macro, it's no longer a leaf.
+ auto &LeafMacros = LeafModuleMacros[II];
+ if (HidAny) {
+ LeafMacros.erase(std::remove_if(LeafMacros.begin(), LeafMacros.end(),
+ [](ModuleMacro *MM) {
+ return MM->NumOverriddenBy != 0;
+ }),
+ LeafMacros.end());
+ }
+
+ // The new macro is always a leaf macro.
+ LeafMacros.push_back(MM);
+ // The identifier now has defined macros (that may or may not be visible).
+ II->setHasMacroDefinition(true);
+
+ New = true;
+ return MM;
+}
+
+ModuleMacro *Preprocessor::getModuleMacro(Module *Mod, IdentifierInfo *II) {
+ llvm::FoldingSetNodeID ID;
+ ModuleMacro::Profile(ID, Mod, II);
+
+ void *InsertPos;
+ return ModuleMacros.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+void Preprocessor::updateModuleMacroInfo(const IdentifierInfo *II,
+ ModuleMacroInfo &Info) {
+ assert(Info.ActiveModuleMacrosGeneration !=
+ CurSubmoduleState->VisibleModules.getGeneration() &&
+ "don't need to update this macro name info");
+ Info.ActiveModuleMacrosGeneration =
+ CurSubmoduleState->VisibleModules.getGeneration();
+
+ auto Leaf = LeafModuleMacros.find(II);
+ if (Leaf == LeafModuleMacros.end()) {
+ // No imported macros at all: nothing to do.
+ return;
+ }
+
+ Info.ActiveModuleMacros.clear();
+
+ // Every macro that's locally overridden is overridden by a visible macro.
+ llvm::DenseMap<ModuleMacro *, int> NumHiddenOverrides;
+ for (auto *O : Info.OverriddenMacros)
+ NumHiddenOverrides[O] = -1;
+
+ // Collect all macros that are not overridden by a visible macro.
+ llvm::SmallVector<ModuleMacro *, 16> Worklist;
+ for (auto *LeafMM : Leaf->second) {
+ assert(LeafMM->getNumOverridingMacros() == 0 && "leaf macro overridden");
+ if (NumHiddenOverrides.lookup(LeafMM) == 0)
+ Worklist.push_back(LeafMM);
+ }
+ while (!Worklist.empty()) {
+ auto *MM = Worklist.pop_back_val();
+ if (CurSubmoduleState->VisibleModules.isVisible(MM->getOwningModule())) {
+ // We only care about collecting definitions; undefinitions only act
+ // to override other definitions.
+ if (MM->getMacroInfo())
+ Info.ActiveModuleMacros.push_back(MM);
+ } else {
+ for (auto *O : MM->overrides())
+ if ((unsigned)++NumHiddenOverrides[O] == O->getNumOverridingMacros())
+ Worklist.push_back(O);
+ }
+ }
+ // Our reverse postorder walk found the macros in reverse order.
+ std::reverse(Info.ActiveModuleMacros.begin(), Info.ActiveModuleMacros.end());
+
+ // Determine whether the macro name is ambiguous.
+ MacroInfo *MI = nullptr;
+ bool IsSystemMacro = true;
+ bool IsAmbiguous = false;
+ if (auto *MD = Info.MD) {
+ while (MD && isa<VisibilityMacroDirective>(MD))
+ MD = MD->getPrevious();
+ if (auto *DMD = dyn_cast_or_null<DefMacroDirective>(MD)) {
+ MI = DMD->getInfo();
+ IsSystemMacro &= SourceMgr.isInSystemHeader(DMD->getLocation());
+ }
+ }
+ for (auto *Active : Info.ActiveModuleMacros) {
+ auto *NewMI = Active->getMacroInfo();
+
+ // Before marking the macro as ambiguous, check if this is a case where
+ // both macros are in system headers. If so, we trust that the system
+ // did not get it wrong. This also handles cases where Clang's own
+ // headers have a different spelling of certain system macros:
+ // #define LONG_MAX __LONG_MAX__ (clang's limits.h)
+ // #define LONG_MAX 0x7fffffffffffffffL (system's limits.h)
+ //
+ // FIXME: Remove the defined-in-system-headers check. clang's limits.h
+ // overrides the system limits.h's macros, so there's no conflict here.
+ if (MI && NewMI != MI &&
+ !MI->isIdenticalTo(*NewMI, *this, /*Syntactically=*/true))
+ IsAmbiguous = true;
+ IsSystemMacro &= Active->getOwningModule()->IsSystem ||
+ SourceMgr.isInSystemHeader(NewMI->getDefinitionLoc());
+ MI = NewMI;
+ }
+ Info.IsAmbiguous = IsAmbiguous && !IsSystemMacro;
+}
+
+void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) {
+ ArrayRef<ModuleMacro*> Leaf;
+ auto LeafIt = LeafModuleMacros.find(II);
+ if (LeafIt != LeafModuleMacros.end())
+ Leaf = LeafIt->second;
+ const MacroState *State = nullptr;
+ auto Pos = CurSubmoduleState->Macros.find(II);
+ if (Pos != CurSubmoduleState->Macros.end())
+ State = &Pos->second;
+
+ llvm::errs() << "MacroState " << State << " " << II->getNameStart();
+ if (State && State->isAmbiguous(*this, II))
+ llvm::errs() << " ambiguous";
+ if (State && !State->getOverriddenMacros().empty()) {
+ llvm::errs() << " overrides";
+ for (auto *O : State->getOverriddenMacros())
+ llvm::errs() << " " << O->getOwningModule()->getFullModuleName();
+ }
+ llvm::errs() << "\n";
+
+ // Dump local macro directives.
+ for (auto *MD = State ? State->getLatest() : nullptr; MD;
+ MD = MD->getPrevious()) {
+ llvm::errs() << " ";
+ MD->dump();
+ }
+
+ // Dump module macros.
+ llvm::DenseSet<ModuleMacro*> Active;
+ for (auto *MM : State ? State->getActiveModuleMacros(*this, II) : None)
+ Active.insert(MM);
+ llvm::DenseSet<ModuleMacro*> Visited;
+ llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf.begin(), Leaf.end());
+ while (!Worklist.empty()) {
+ auto *MM = Worklist.pop_back_val();
+ llvm::errs() << " ModuleMacro " << MM << " "
+ << MM->getOwningModule()->getFullModuleName();
+ if (!MM->getMacroInfo())
+ llvm::errs() << " undef";
+
+ if (Active.count(MM))
+ llvm::errs() << " active";
+ else if (!CurSubmoduleState->VisibleModules.isVisible(
+ MM->getOwningModule()))
+ llvm::errs() << " hidden";
+ else if (MM->getMacroInfo())
+ llvm::errs() << " overridden";
+
+ if (!MM->overrides().empty()) {
+ llvm::errs() << " overrides";
+ for (auto *O : MM->overrides()) {
+ llvm::errs() << " " << O->getOwningModule()->getFullModuleName();
+ if (Visited.insert(O).second)
+ Worklist.push_back(O);
+ }
+ }
+ llvm::errs() << "\n";
+ if (auto *MI = MM->getMacroInfo()) {
+ llvm::errs() << " ";
+ MI->dump();
+ llvm::errs() << "\n";
+ }
+ }
+}
+
+/// RegisterBuiltinMacro - Register the specified identifier in the identifier
+/// table and mark it as a builtin macro to be expanded.
+static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){
+ // Get the identifier.
+ IdentifierInfo *Id = PP.getIdentifierInfo(Name);
+
+ // Mark it as being a macro that is builtin.
+ MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation());
+ MI->setIsBuiltinMacro();
+ PP.appendDefMacroDirective(Id, MI);
+ return Id;
+}
+
+/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
+/// identifier table.
+void Preprocessor::RegisterBuiltinMacros() {
+ Ident__LINE__ = RegisterBuiltinMacro(*this, "__LINE__");
+ Ident__FILE__ = RegisterBuiltinMacro(*this, "__FILE__");
+ Ident__DATE__ = RegisterBuiltinMacro(*this, "__DATE__");
+ Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__");
+ Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__");
+ Ident_Pragma = RegisterBuiltinMacro(*this, "_Pragma");
+
+ // C++ Standing Document Extensions.
+ if (LangOpts.CPlusPlus)
+ Ident__has_cpp_attribute =
+ RegisterBuiltinMacro(*this, "__has_cpp_attribute");
+ else
+ Ident__has_cpp_attribute = nullptr;
+
+ // GCC Extensions.
+ Ident__BASE_FILE__ = RegisterBuiltinMacro(*this, "__BASE_FILE__");
+ Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro(*this, "__INCLUDE_LEVEL__");
+ Ident__TIMESTAMP__ = RegisterBuiltinMacro(*this, "__TIMESTAMP__");
+
+ // Microsoft Extensions.
+ if (LangOpts.MicrosoftExt) {
+ Ident__identifier = RegisterBuiltinMacro(*this, "__identifier");
+ Ident__pragma = RegisterBuiltinMacro(*this, "__pragma");
+ } else {
+ Ident__identifier = nullptr;
+ Ident__pragma = nullptr;
+ }
+
+ // Clang Extensions.
+ Ident__FILE_NAME__ = RegisterBuiltinMacro(*this, "__FILE_NAME__");
+ Ident__has_feature = RegisterBuiltinMacro(*this, "__has_feature");
+ Ident__has_extension = RegisterBuiltinMacro(*this, "__has_extension");
+ Ident__has_builtin = RegisterBuiltinMacro(*this, "__has_builtin");
+ Ident__has_attribute = RegisterBuiltinMacro(*this, "__has_attribute");
+ Ident__has_c_attribute = RegisterBuiltinMacro(*this, "__has_c_attribute");
+ Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute");
+ Ident__has_include = RegisterBuiltinMacro(*this, "__has_include");
+ Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
+ Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning");
+ Ident__is_identifier = RegisterBuiltinMacro(*this, "__is_identifier");
+ Ident__is_target_arch = RegisterBuiltinMacro(*this, "__is_target_arch");
+ Ident__is_target_vendor = RegisterBuiltinMacro(*this, "__is_target_vendor");
+ Ident__is_target_os = RegisterBuiltinMacro(*this, "__is_target_os");
+ Ident__is_target_environment =
+ RegisterBuiltinMacro(*this, "__is_target_environment");
+
+ // Modules.
+ Ident__building_module = RegisterBuiltinMacro(*this, "__building_module");
+ if (!LangOpts.CurrentModule.empty())
+ Ident__MODULE__ = RegisterBuiltinMacro(*this, "__MODULE__");
+ else
+ Ident__MODULE__ = nullptr;
+}
+
+/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
+/// in its expansion, currently expands to that token literally.
+static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
+ const IdentifierInfo *MacroIdent,
+ Preprocessor &PP) {
+ IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
+
+ // If the token isn't an identifier, it's always literally expanded.
+ if (!II) return true;
+
+ // If the information about this identifier is out of date, update it from
+ // the external source.
+ if (II->isOutOfDate())
+ PP.getExternalSource()->updateOutOfDateIdentifier(*II);
+
+ // If the identifier is a macro, and if that macro is enabled, it may be
+ // expanded so it's not a trivial expansion.
+ if (auto *ExpansionMI = PP.getMacroInfo(II))
+ if (ExpansionMI->isEnabled() &&
+ // Fast expanding "#define X X" is ok, because X would be disabled.
+ II != MacroIdent)
+ return false;
+
+ // If this is an object-like macro invocation, it is safe to trivially expand
+ // it.
+ if (MI->isObjectLike()) return true;
+
+ // If this is a function-like macro invocation, it's safe to trivially expand
+ // as long as the identifier is not a macro argument.
+ return std::find(MI->param_begin(), MI->param_end(), II) == MI->param_end();
+}
+
+/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
+/// lexed is a '('. If so, consume the token and return true, if not, this
+/// method should have no observable side-effect on the lexed tokens.
+bool Preprocessor::isNextPPTokenLParen() {
+ // Do some quick tests for rejection cases.
+ unsigned Val;
+ if (CurLexer)
+ Val = CurLexer->isNextPPTokenLParen();
+ else
+ Val = CurTokenLexer->isNextTokenLParen();
+
+ if (Val == 2) {
+ // We have run off the end. If it's a source file we don't
+ // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
+ // macro stack.
+ if (CurPPLexer)
+ return false;
+ for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
+ if (Entry.TheLexer)
+ Val = Entry.TheLexer->isNextPPTokenLParen();
+ else
+ Val = Entry.TheTokenLexer->isNextTokenLParen();
+
+ if (Val != 2)
+ break;
+
+ // Ran off the end of a source file?
+ if (Entry.ThePPLexer)
+ return false;
+ }
+ }
+
+ // Okay, if we know that the token is a '(', lex it and return. Otherwise we
+ // have found something that isn't a '(' or we found the end of the
+ // translation unit. In either case, return false.
+ return Val == 1;
+}
+
+/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
+/// expanded as a macro, handle it and return the next token as 'Identifier'.
+bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
+ const MacroDefinition &M) {
+ MacroInfo *MI = M.getMacroInfo();
+
+ // If this is a macro expansion in the "#if !defined(x)" line for the file,
+ // then the macro could expand to different things in other contexts, we need
+ // to disable the optimization in this case.
+ if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro();
+
+ // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
+ if (MI->isBuiltinMacro()) {
+ if (Callbacks)
+ Callbacks->MacroExpands(Identifier, M, Identifier.getLocation(),
+ /*Args=*/nullptr);
+ ExpandBuiltinMacro(Identifier);
+ return true;
+ }
+
+ /// Args - If this is a function-like macro expansion, this contains,
+ /// for each macro argument, the list of tokens that were provided to the
+ /// invocation.
+ MacroArgs *Args = nullptr;
+
+ // Remember where the end of the expansion occurred. For an object-like
+ // macro, this is the identifier. For a function-like macro, this is the ')'.
+ SourceLocation ExpansionEnd = Identifier.getLocation();
+
+ // If this is a function-like macro, read the arguments.
+ if (MI->isFunctionLike()) {
+ // Remember that we are now parsing the arguments to a macro invocation.
+ // Preprocessor directives used inside macro arguments are not portable, and
+ // this enables the warning.
+ InMacroArgs = true;
+ ArgMacro = &Identifier;
+
+ Args = ReadMacroCallArgumentList(Identifier, MI, ExpansionEnd);
+
+ // Finished parsing args.
+ InMacroArgs = false;
+ ArgMacro = nullptr;
+
+ // If there was an error parsing the arguments, bail out.
+ if (!Args) return true;
+
+ ++NumFnMacroExpanded;
+ } else {
+ ++NumMacroExpanded;
+ }
+
+ // Notice that this macro has been used.
+ markMacroAsUsed(MI);
+
+ // Remember where the token is expanded.
+ SourceLocation ExpandLoc = Identifier.getLocation();
+ SourceRange ExpansionRange(ExpandLoc, ExpansionEnd);
+
+ if (Callbacks) {
+ if (InMacroArgs) {
+ // We can have macro expansion inside a conditional directive while
+ // reading the function macro arguments. To ensure, in that case, that
+ // MacroExpands callbacks still happen in source order, queue this
+ // callback to have it happen after the function macro callback.
+ DelayedMacroExpandsCallbacks.push_back(
+ MacroExpandsInfo(Identifier, M, ExpansionRange));
+ } else {
+ Callbacks->MacroExpands(Identifier, M, ExpansionRange, Args);
+ if (!DelayedMacroExpandsCallbacks.empty()) {
+ for (const MacroExpandsInfo &Info : DelayedMacroExpandsCallbacks) {
+ // FIXME: We lose macro args info with delayed callback.
+ Callbacks->MacroExpands(Info.Tok, Info.MD, Info.Range,
+ /*Args=*/nullptr);
+ }
+ DelayedMacroExpandsCallbacks.clear();
+ }
+ }
+ }
+
+ // If the macro definition is ambiguous, complain.
+ if (M.isAmbiguous()) {
+ Diag(Identifier, diag::warn_pp_ambiguous_macro)
+ << Identifier.getIdentifierInfo();
+ Diag(MI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_chosen)
+ << Identifier.getIdentifierInfo();
+ M.forAllDefinitions([&](const MacroInfo *OtherMI) {
+ if (OtherMI != MI)
+ Diag(OtherMI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_other)
+ << Identifier.getIdentifierInfo();
+ });
+ }
+
+ // If we started lexing a macro, enter the macro expansion body.
+
+ // If this macro expands to no tokens, don't bother to push it onto the
+ // expansion stack, only to take it right back off.
+ if (MI->getNumTokens() == 0) {
+ // No need for arg info.
+ if (Args) Args->destroy(*this);
+
+ // Propagate whitespace info as if we had pushed, then popped,
+ // a macro context.
+ Identifier.setFlag(Token::LeadingEmptyMacro);
+ PropagateLineStartLeadingSpaceInfo(Identifier);
+ ++NumFastMacroExpanded;
+ return false;
+ } else if (MI->getNumTokens() == 1 &&
+ isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
+ *this)) {
+ // Otherwise, if this macro expands into a single trivially-expanded
+ // token: expand it now. This handles common cases like
+ // "#define VAL 42".
+
+ // No need for arg info.
+ if (Args) Args->destroy(*this);
+
+ // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
+ // identifier to the expanded token.
+ bool isAtStartOfLine = Identifier.isAtStartOfLine();
+ bool hasLeadingSpace = Identifier.hasLeadingSpace();
+
+ // Replace the result token.
+ Identifier = MI->getReplacementToken(0);
+
+ // Restore the StartOfLine/LeadingSpace markers.
+ Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
+ Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
+
+ // Update the tokens location to include both its expansion and physical
+ // locations.
+ SourceLocation Loc =
+ SourceMgr.createExpansionLoc(Identifier.getLocation(), ExpandLoc,
+ ExpansionEnd,Identifier.getLength());
+ Identifier.setLocation(Loc);
+
+ // If this is a disabled macro or #define X X, we must mark the result as
+ // unexpandable.
+ if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) {
+ if (MacroInfo *NewMI = getMacroInfo(NewII))
+ if (!NewMI->isEnabled() || NewMI == MI) {
+ Identifier.setFlag(Token::DisableExpand);
+ // Don't warn for "#define X X" like "#define bool bool" from
+ // stdbool.h.
+ if (NewMI != MI || MI->isFunctionLike())
+ Diag(Identifier, diag::pp_disabled_macro_expansion);
+ }
+ }
+
+ // Since this is not an identifier token, it can't be macro expanded, so
+ // we're done.
+ ++NumFastMacroExpanded;
+ return true;
+ }
+
+ // Start expanding the macro.
+ EnterMacro(Identifier, ExpansionEnd, MI, Args);
+ return false;
+}
+
+enum Bracket {
+ Brace,
+ Paren
+};
+
+/// CheckMatchedBrackets - Returns true if the braces and parentheses in the
+/// token vector are properly nested.
+static bool CheckMatchedBrackets(const SmallVectorImpl<Token> &Tokens) {
+ SmallVector<Bracket, 8> Brackets;
+ for (SmallVectorImpl<Token>::const_iterator I = Tokens.begin(),
+ E = Tokens.end();
+ I != E; ++I) {
+ if (I->is(tok::l_paren)) {
+ Brackets.push_back(Paren);
+ } else if (I->is(tok::r_paren)) {
+ if (Brackets.empty() || Brackets.back() == Brace)
+ return false;
+ Brackets.pop_back();
+ } else if (I->is(tok::l_brace)) {
+ Brackets.push_back(Brace);
+ } else if (I->is(tok::r_brace)) {
+ if (Brackets.empty() || Brackets.back() == Paren)
+ return false;
+ Brackets.pop_back();
+ }
+ }
+ return Brackets.empty();
+}
+
+/// GenerateNewArgTokens - Returns true if OldTokens can be converted to a new
+/// vector of tokens in NewTokens. The new number of arguments will be placed
+/// in NumArgs and the ranges which need to surrounded in parentheses will be
+/// in ParenHints.
+/// Returns false if the token stream cannot be changed. If this is because
+/// of an initializer list starting a macro argument, the range of those
+/// initializer lists will be place in InitLists.
+static bool GenerateNewArgTokens(Preprocessor &PP,
+ SmallVectorImpl<Token> &OldTokens,
+ SmallVectorImpl<Token> &NewTokens,
+ unsigned &NumArgs,
+ SmallVectorImpl<SourceRange> &ParenHints,
+ SmallVectorImpl<SourceRange> &InitLists) {
+ if (!CheckMatchedBrackets(OldTokens))
+ return false;
+
+ // Once it is known that the brackets are matched, only a simple count of the
+ // braces is needed.
+ unsigned Braces = 0;
+
+ // First token of a new macro argument.
+ SmallVectorImpl<Token>::iterator ArgStartIterator = OldTokens.begin();
+
+ // First closing brace in a new macro argument. Used to generate
+ // SourceRanges for InitLists.
+ SmallVectorImpl<Token>::iterator ClosingBrace = OldTokens.end();
+ NumArgs = 0;
+ Token TempToken;
+ // Set to true when a macro separator token is found inside a braced list.
+ // If true, the fixed argument spans multiple old arguments and ParenHints
+ // will be updated.
+ bool FoundSeparatorToken = false;
+ for (SmallVectorImpl<Token>::iterator I = OldTokens.begin(),
+ E = OldTokens.end();
+ I != E; ++I) {
+ if (I->is(tok::l_brace)) {
+ ++Braces;
+ } else if (I->is(tok::r_brace)) {
+ --Braces;
+ if (Braces == 0 && ClosingBrace == E && FoundSeparatorToken)
+ ClosingBrace = I;
+ } else if (I->is(tok::eof)) {
+ // EOF token is used to separate macro arguments
+ if (Braces != 0) {
+ // Assume comma separator is actually braced list separator and change
+ // it back to a comma.
+ FoundSeparatorToken = true;
+ I->setKind(tok::comma);
+ I->setLength(1);
+ } else { // Braces == 0
+ // Separator token still separates arguments.
+ ++NumArgs;
+
+ // If the argument starts with a brace, it can't be fixed with
+ // parentheses. A different diagnostic will be given.
+ if (FoundSeparatorToken && ArgStartIterator->is(tok::l_brace)) {
+ InitLists.push_back(
+ SourceRange(ArgStartIterator->getLocation(),
+ PP.getLocForEndOfToken(ClosingBrace->getLocation())));
+ ClosingBrace = E;
+ }
+
+ // Add left paren
+ if (FoundSeparatorToken) {
+ TempToken.startToken();
+ TempToken.setKind(tok::l_paren);
+ TempToken.setLocation(ArgStartIterator->getLocation());
+ TempToken.setLength(0);
+ NewTokens.push_back(TempToken);
+ }
+
+ // Copy over argument tokens
+ NewTokens.insert(NewTokens.end(), ArgStartIterator, I);
+
+ // Add right paren and store the paren locations in ParenHints
+ if (FoundSeparatorToken) {
+ SourceLocation Loc = PP.getLocForEndOfToken((I - 1)->getLocation());
+ TempToken.startToken();
+ TempToken.setKind(tok::r_paren);
+ TempToken.setLocation(Loc);
+ TempToken.setLength(0);
+ NewTokens.push_back(TempToken);
+ ParenHints.push_back(SourceRange(ArgStartIterator->getLocation(),
+ Loc));
+ }
+
+ // Copy separator token
+ NewTokens.push_back(*I);
+
+ // Reset values
+ ArgStartIterator = I + 1;
+ FoundSeparatorToken = false;
+ }
+ }
+ }
+
+ return !ParenHints.empty() && InitLists.empty();
+}
+
+/// ReadFunctionLikeMacroArgs - After reading "MACRO" and knowing that the next
+/// token is the '(' of the macro, this method is invoked to read all of the
+/// actual arguments specified for the macro invocation. This returns null on
+/// error.
+MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName,
+ MacroInfo *MI,
+ SourceLocation &MacroEnd) {
+ // The number of fixed arguments to parse.
+ unsigned NumFixedArgsLeft = MI->getNumParams();
+ bool isVariadic = MI->isVariadic();
+
+ // Outer loop, while there are more arguments, keep reading them.
+ Token Tok;
+
+ // Read arguments as unexpanded tokens. This avoids issues, e.g., where
+ // an argument value in a macro could expand to ',' or '(' or ')'.
+ LexUnexpandedToken(Tok);
+ assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
+
+ // ArgTokens - Build up a list of tokens that make up each argument. Each
+ // argument is separated by an EOF token. Use a SmallVector so we can avoid
+ // heap allocations in the common case.
+ SmallVector<Token, 64> ArgTokens;
+ bool ContainsCodeCompletionTok = false;
+ bool FoundElidedComma = false;
+
+ SourceLocation TooManyArgsLoc;
+
+ unsigned NumActuals = 0;
+ while (Tok.isNot(tok::r_paren)) {
+ if (ContainsCodeCompletionTok && Tok.isOneOf(tok::eof, tok::eod))
+ break;
+
+ assert(Tok.isOneOf(tok::l_paren, tok::comma) &&
+ "only expect argument separators here");
+
+ size_t ArgTokenStart = ArgTokens.size();
+ SourceLocation ArgStartLoc = Tok.getLocation();
+
+ // C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note
+ // that we already consumed the first one.
+ unsigned NumParens = 0;
+
+ while (true) {
+ // Read arguments as unexpanded tokens. This avoids issues, e.g., where
+ // an argument value in a macro could expand to ',' or '(' or ')'.
+ LexUnexpandedToken(Tok);
+
+ if (Tok.isOneOf(tok::eof, tok::eod)) { // "#if f(<eof>" & "#if f(\n"
+ if (!ContainsCodeCompletionTok) {
+ Diag(MacroName, diag::err_unterm_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+ // Do not lose the EOF/EOD. Return it to the client.
+ MacroName = Tok;
+ return nullptr;
+ }
+ // Do not lose the EOF/EOD.
+ auto Toks = std::make_unique<Token[]>(1);
+ Toks[0] = Tok;
+ EnterTokenStream(std::move(Toks), 1, true, /*IsReinject*/ false);
+ break;
+ } else if (Tok.is(tok::r_paren)) {
+ // If we found the ) token, the macro arg list is done.
+ if (NumParens-- == 0) {
+ MacroEnd = Tok.getLocation();
+ if (!ArgTokens.empty() &&
+ ArgTokens.back().commaAfterElided()) {
+ FoundElidedComma = true;
+ }
+ break;
+ }
+ } else if (Tok.is(tok::l_paren)) {
+ ++NumParens;
+ } else if (Tok.is(tok::comma) && NumParens == 0 &&
+ !(Tok.getFlags() & Token::IgnoredComma)) {
+ // In Microsoft-compatibility mode, single commas from nested macro
+ // expansions should not be considered as argument separators. We test
+ // for this with the IgnoredComma token flag above.
+
+ // Comma ends this argument if there are more fixed arguments expected.
+ // However, if this is a variadic macro, and this is part of the
+ // variadic part, then the comma is just an argument token.
+ if (!isVariadic) break;
+ if (NumFixedArgsLeft > 1)
+ break;
+ } else if (Tok.is(tok::comment) && !KeepMacroComments) {
+ // If this is a comment token in the argument list and we're just in
+ // -C mode (not -CC mode), discard the comment.
+ continue;
+ } else if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
+ // Reading macro arguments can cause macros that we are currently
+ // expanding from to be popped off the expansion stack. Doing so causes
+ // them to be reenabled for expansion. Here we record whether any
+ // identifiers we lex as macro arguments correspond to disabled macros.
+ // If so, we mark the token as noexpand. This is a subtle aspect of
+ // C99 6.10.3.4p2.
+ if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
+ if (!MI->isEnabled())
+ Tok.setFlag(Token::DisableExpand);
+ } else if (Tok.is(tok::code_completion)) {
+ ContainsCodeCompletionTok = true;
+ if (CodeComplete)
+ CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),
+ MI, NumActuals);
+ // Don't mark that we reached the code-completion point because the
+ // parser is going to handle the token and there will be another
+ // code-completion callback.
+ }
+
+ ArgTokens.push_back(Tok);
+ }
+
+ // If this was an empty argument list foo(), don't add this as an empty
+ // argument.
+ if (ArgTokens.empty() && Tok.getKind() == tok::r_paren)
+ break;
+
+ // If this is not a variadic macro, and too many args were specified, emit
+ // an error.
+ if (!isVariadic && NumFixedArgsLeft == 0 && TooManyArgsLoc.isInvalid()) {
+ if (ArgTokens.size() != ArgTokenStart)
+ TooManyArgsLoc = ArgTokens[ArgTokenStart].getLocation();
+ else
+ TooManyArgsLoc = ArgStartLoc;
+ }
+
+ // Empty arguments are standard in C99 and C++0x, and are supported as an
+ // extension in other modes.
+ if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99)
+ Diag(Tok, LangOpts.CPlusPlus11 ?
+ diag::warn_cxx98_compat_empty_fnmacro_arg :
+ diag::ext_empty_fnmacro_arg);
+
+ // Add a marker EOF token to the end of the token list for this argument.
+ Token EOFTok;
+ EOFTok.startToken();
+ EOFTok.setKind(tok::eof);
+ EOFTok.setLocation(Tok.getLocation());
+ EOFTok.setLength(0);
+ ArgTokens.push_back(EOFTok);
+ ++NumActuals;
+ if (!ContainsCodeCompletionTok && NumFixedArgsLeft != 0)
+ --NumFixedArgsLeft;
+ }
+
+ // Okay, we either found the r_paren. Check to see if we parsed too few
+ // arguments.
+ unsigned MinArgsExpected = MI->getNumParams();
+
+ // If this is not a variadic macro, and too many args were specified, emit
+ // an error.
+ if (!isVariadic && NumActuals > MinArgsExpected &&
+ !ContainsCodeCompletionTok) {
+ // Emit the diagnostic at the macro name in case there is a missing ).
+ // Emitting it at the , could be far away from the macro name.
+ Diag(TooManyArgsLoc, diag::err_too_many_args_in_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+
+ // Commas from braced initializer lists will be treated as argument
+ // separators inside macros. Attempt to correct for this with parentheses.
+ // TODO: See if this can be generalized to angle brackets for templates
+ // inside macro arguments.
+
+ SmallVector<Token, 4> FixedArgTokens;
+ unsigned FixedNumArgs = 0;
+ SmallVector<SourceRange, 4> ParenHints, InitLists;
+ if (!GenerateNewArgTokens(*this, ArgTokens, FixedArgTokens, FixedNumArgs,
+ ParenHints, InitLists)) {
+ if (!InitLists.empty()) {
+ DiagnosticBuilder DB =
+ Diag(MacroName,
+ diag::note_init_list_at_beginning_of_macro_argument);
+ for (SourceRange Range : InitLists)
+ DB << Range;
+ }
+ return nullptr;
+ }
+ if (FixedNumArgs != MinArgsExpected)
+ return nullptr;
+
+ DiagnosticBuilder DB = Diag(MacroName, diag::note_suggest_parens_for_macro);
+ for (SourceRange ParenLocation : ParenHints) {
+ DB << FixItHint::CreateInsertion(ParenLocation.getBegin(), "(");
+ DB << FixItHint::CreateInsertion(ParenLocation.getEnd(), ")");
+ }
+ ArgTokens.swap(FixedArgTokens);
+ NumActuals = FixedNumArgs;
+ }
+
+ // See MacroArgs instance var for description of this.
+ bool isVarargsElided = false;
+
+ if (ContainsCodeCompletionTok) {
+ // Recover from not-fully-formed macro invocation during code-completion.
+ Token EOFTok;
+ EOFTok.startToken();
+ EOFTok.setKind(tok::eof);
+ EOFTok.setLocation(Tok.getLocation());
+ EOFTok.setLength(0);
+ for (; NumActuals < MinArgsExpected; ++NumActuals)
+ ArgTokens.push_back(EOFTok);
+ }
+
+ if (NumActuals < MinArgsExpected) {
+ // There are several cases where too few arguments is ok, handle them now.
+ if (NumActuals == 0 && MinArgsExpected == 1) {
+ // #define A(X) or #define A(...) ---> A()
+
+ // If there is exactly one argument, and that argument is missing,
+ // then we have an empty "()" argument empty list. This is fine, even if
+ // the macro expects one argument (the argument is just empty).
+ isVarargsElided = MI->isVariadic();
+ } else if ((FoundElidedComma || MI->isVariadic()) &&
+ (NumActuals+1 == MinArgsExpected || // A(x, ...) -> A(X)
+ (NumActuals == 0 && MinArgsExpected == 2))) {// A(x,...) -> A()
+ // Varargs where the named vararg parameter is missing: OK as extension.
+ // #define A(x, ...)
+ // A("blah")
+ //
+ // If the macro contains the comma pasting extension, the diagnostic
+ // is suppressed; we know we'll get another diagnostic later.
+ if (!MI->hasCommaPasting()) {
+ Diag(Tok, diag::ext_missing_varargs_arg);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+ }
+
+ // Remember this occurred, allowing us to elide the comma when used for
+ // cases like:
+ // #define A(x, foo...) blah(a, ## foo)
+ // #define B(x, ...) blah(a, ## __VA_ARGS__)
+ // #define C(...) blah(a, ## __VA_ARGS__)
+ // A(x) B(x) C()
+ isVarargsElided = true;
+ } else if (!ContainsCodeCompletionTok) {
+ // Otherwise, emit the error.
+ Diag(Tok, diag::err_too_few_args_in_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+ return nullptr;
+ }
+
+ // Add a marker EOF token to the end of the token list for this argument.
+ SourceLocation EndLoc = Tok.getLocation();
+ Tok.startToken();
+ Tok.setKind(tok::eof);
+ Tok.setLocation(EndLoc);
+ Tok.setLength(0);
+ ArgTokens.push_back(Tok);
+
+ // If we expect two arguments, add both as empty.
+ if (NumActuals == 0 && MinArgsExpected == 2)
+ ArgTokens.push_back(Tok);
+
+ } else if (NumActuals > MinArgsExpected && !MI->isVariadic() &&
+ !ContainsCodeCompletionTok) {
+ // Emit the diagnostic at the macro name in case there is a missing ).
+ // Emitting it at the , could be far away from the macro name.
+ Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
+ Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+ << MacroName.getIdentifierInfo();
+ return nullptr;
+ }
+
+ return MacroArgs::create(MI, ArgTokens, isVarargsElided, *this);
+}
+
+/// Keeps macro expanded tokens for TokenLexers.
+//
+/// Works like a stack; a TokenLexer adds the macro expanded tokens that is
+/// going to lex in the cache and when it finishes the tokens are removed
+/// from the end of the cache.
+Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer,
+ ArrayRef<Token> tokens) {
+ assert(tokLexer);
+ if (tokens.empty())
+ return nullptr;
+
+ size_t newIndex = MacroExpandedTokens.size();
+ bool cacheNeedsToGrow = tokens.size() >
+ MacroExpandedTokens.capacity()-MacroExpandedTokens.size();
+ MacroExpandedTokens.append(tokens.begin(), tokens.end());
+
+ if (cacheNeedsToGrow) {
+ // Go through all the TokenLexers whose 'Tokens' pointer points in the
+ // buffer and update the pointers to the (potential) new buffer array.
+ for (const auto &Lexer : MacroExpandingLexersStack) {
+ TokenLexer *prevLexer;
+ size_t tokIndex;
+ std::tie(prevLexer, tokIndex) = Lexer;
+ prevLexer->Tokens = MacroExpandedTokens.data() + tokIndex;
+ }
+ }
+
+ MacroExpandingLexersStack.push_back(std::make_pair(tokLexer, newIndex));
+ return MacroExpandedTokens.data() + newIndex;
+}
+
+void Preprocessor::removeCachedMacroExpandedTokensOfLastLexer() {
+ assert(!MacroExpandingLexersStack.empty());
+ size_t tokIndex = MacroExpandingLexersStack.back().second;
+ assert(tokIndex < MacroExpandedTokens.size());
+ // Pop the cached macro expanded tokens from the end.
+ MacroExpandedTokens.resize(tokIndex);
+ MacroExpandingLexersStack.pop_back();
+}
+
+/// ComputeDATE_TIME - Compute the current time, enter it into the specified
+/// scratch buffer, then return DATELoc/TIMELoc locations with the position of
+/// the identifier tokens inserted.
+static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
+ Preprocessor &PP) {
+ time_t TT = time(nullptr);
+ struct tm *TM = localtime(&TT);
+
+ static const char * const Months[] = {
+ "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
+ };
+
+ {
+ SmallString<32> TmpBuffer;
+ llvm::raw_svector_ostream TmpStream(TmpBuffer);
+ TmpStream << llvm::format("\"%s %2d %4d\"", Months[TM->tm_mon],
+ TM->tm_mday, TM->tm_year + 1900);
+ Token TmpTok;
+ TmpTok.startToken();
+ PP.CreateString(TmpStream.str(), TmpTok);
+ DATELoc = TmpTok.getLocation();
+ }
+
+ {
+ SmallString<32> TmpBuffer;
+ llvm::raw_svector_ostream TmpStream(TmpBuffer);
+ TmpStream << llvm::format("\"%02d:%02d:%02d\"",
+ TM->tm_hour, TM->tm_min, TM->tm_sec);
+ Token TmpTok;
+ TmpTok.startToken();
+ PP.CreateString(TmpStream.str(), TmpTok);
+ TIMELoc = TmpTok.getLocation();
+ }
+}
+
+/// HasFeature - Return true if we recognize and implement the feature
+/// specified by the identifier as a standard language feature.
+static bool HasFeature(const Preprocessor &PP, StringRef Feature) {
+ const LangOptions &LangOpts = PP.getLangOpts();
+
+ // Normalize the feature name, __foo__ becomes foo.
+ if (Feature.startswith("__") && Feature.endswith("__") && Feature.size() >= 4)
+ Feature = Feature.substr(2, Feature.size() - 4);
+
+#define FEATURE(Name, Predicate) .Case(#Name, Predicate)
+ return llvm::StringSwitch<bool>(Feature)
+#include "clang/Basic/Features.def"
+ .Default(false);
+#undef FEATURE
+}
+
+/// HasExtension - Return true if we recognize and implement the feature
+/// specified by the identifier, either as an extension or a standard language
+/// feature.
+static bool HasExtension(const Preprocessor &PP, StringRef Extension) {
+ if (HasFeature(PP, Extension))
+ return true;
+
+ // If the use of an extension results in an error diagnostic, extensions are
+ // effectively unavailable, so just return false here.
+ if (PP.getDiagnostics().getExtensionHandlingBehavior() >=
+ diag::Severity::Error)
+ return false;
+
+ const LangOptions &LangOpts = PP.getLangOpts();
+
+ // Normalize the extension name, __foo__ becomes foo.
+ if (Extension.startswith("__") && Extension.endswith("__") &&
+ Extension.size() >= 4)
+ Extension = Extension.substr(2, Extension.size() - 4);
+
+ // Because we inherit the feature list from HasFeature, this string switch
+ // must be less restrictive than HasFeature's.
+#define EXTENSION(Name, Predicate) .Case(#Name, Predicate)
+ return llvm::StringSwitch<bool>(Extension)
+#include "clang/Basic/Features.def"
+ .Default(false);
+#undef EXTENSION
+}
+
+/// EvaluateHasIncludeCommon - Process a '__has_include("path")'
+/// or '__has_include_next("path")' expression.
+/// Returns true if successful.
+static bool EvaluateHasIncludeCommon(Token &Tok,
+ IdentifierInfo *II, Preprocessor &PP,
+ const DirectoryLookup *LookupFrom,
+ const FileEntry *LookupFromFile) {
+ // Save the location of the current token. If a '(' is later found, use
+ // that location. If not, use the end of this location instead.
+ SourceLocation LParenLoc = Tok.getLocation();
+
+ // These expressions are only allowed within a preprocessor directive.
+ if (!PP.isParsingIfOrElifDirective()) {
+ PP.Diag(LParenLoc, diag::err_pp_directive_required) << II;
+ // Return a valid identifier token.
+ assert(Tok.is(tok::identifier));
+ Tok.setIdentifierInfo(II);
+ return false;
+ }
+
+ // Get '('. If we don't have a '(', try to form a header-name token.
+ do {
+ if (PP.LexHeaderName(Tok))
+ return false;
+ } while (Tok.getKind() == tok::comment);
+
+ // Ensure we have a '('.
+ if (Tok.isNot(tok::l_paren)) {
+ // No '(', use end of last token.
+ LParenLoc = PP.getLocForEndOfToken(LParenLoc);
+ PP.Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
+ // If the next token looks like a filename or the start of one,
+ // assume it is and process it as such.
+ if (Tok.isNot(tok::header_name))
+ return false;
+ } else {
+ // Save '(' location for possible missing ')' message.
+ LParenLoc = Tok.getLocation();
+ if (PP.LexHeaderName(Tok))
+ return false;
+ }
+
+ if (Tok.isNot(tok::header_name)) {
+ PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename);
+ return false;
+ }
+
+ // Reserve a buffer to get the spelling.
+ SmallString<128> FilenameBuffer;
+ bool Invalid = false;
+ StringRef Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
+ if (Invalid)
+ return false;
+
+ SourceLocation FilenameLoc = Tok.getLocation();
+
+ // Get ')'.
+ PP.LexNonComment(Tok);
+
+ // Ensure we have a trailing ).
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(PP.getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
+ << II << tok::r_paren;
+ PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+ return false;
+ }
+
+ bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ if (Filename.empty())
+ return false;
+
+ // Search include directories.
+ const DirectoryLookup *CurDir;
+ Optional<FileEntryRef> File =
+ PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, LookupFromFile,
+ CurDir, nullptr, nullptr, nullptr, nullptr, nullptr);
+
+ if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
+ SrcMgr::CharacteristicKind FileType = SrcMgr::C_User;
+ if (File)
+ FileType =
+ PP.getHeaderSearchInfo().getFileDirFlavor(&File->getFileEntry());
+ Callbacks->HasInclude(FilenameLoc, Filename, isAngled, File, FileType);
+ }
+
+ // Get the result value. A result of true means the file exists.
+ return File.hasValue();
+}
+
+/// EvaluateHasInclude - Process a '__has_include("path")' expression.
+/// Returns true if successful.
+static bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II,
+ Preprocessor &PP) {
+ return EvaluateHasIncludeCommon(Tok, II, PP, nullptr, nullptr);
+}
+
+/// EvaluateHasIncludeNext - Process '__has_include_next("path")' expression.
+/// Returns true if successful.
+static bool EvaluateHasIncludeNext(Token &Tok,
+ IdentifierInfo *II, Preprocessor &PP) {
+ // __has_include_next is like __has_include, except that we start
+ // searching after the current found directory. If we can't do this,
+ // issue a diagnostic.
+ // FIXME: Factor out duplication with
+ // Preprocessor::HandleIncludeNextDirective.
+ const DirectoryLookup *Lookup = PP.GetCurDirLookup();
+ const FileEntry *LookupFromFile = nullptr;
+ if (PP.isInPrimaryFile() && PP.getLangOpts().IsHeaderFile) {
+ // If the main file is a header, then it's either for PCH/AST generation,
+ // or libclang opened it. Either way, handle it as a normal include below
+ // and do not complain about __has_include_next.
+ } else if (PP.isInPrimaryFile()) {
+ Lookup = nullptr;
+ PP.Diag(Tok, diag::pp_include_next_in_primary);
+ } else if (PP.getCurrentLexerSubmodule()) {
+ // Start looking up in the directory *after* the one in which the current
+ // file would be found, if any.
+ assert(PP.getCurrentLexer() && "#include_next directive in macro?");
+ LookupFromFile = PP.getCurrentLexer()->getFileEntry();
+ Lookup = nullptr;
+ } else if (!Lookup) {
+ PP.Diag(Tok, diag::pp_include_next_absolute_path);
+ } else {
+ // Start looking up in the next directory.
+ ++Lookup;
+ }
+
+ return EvaluateHasIncludeCommon(Tok, II, PP, Lookup, LookupFromFile);
+}
+
+/// Process single-argument builtin feature-like macros that return
+/// integer values.
+static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,
+ Token &Tok, IdentifierInfo *II,
+ Preprocessor &PP,
+ llvm::function_ref<
+ int(Token &Tok,
+ bool &HasLexedNextTok)> Op) {
+ // Parse the initial '('.
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ PP.Diag(Tok.getLocation(), diag::err_pp_expected_after) << II
+ << tok::l_paren;
+
+ // Provide a dummy '0' value on output stream to elide further errors.
+ if (!Tok.isOneOf(tok::eof, tok::eod)) {
+ OS << 0;
+ Tok.setKind(tok::numeric_constant);
+ }
+ return;
+ }
+
+ unsigned ParenDepth = 1;
+ SourceLocation LParenLoc = Tok.getLocation();
+ llvm::Optional<int> Result;
+
+ Token ResultTok;
+ bool SuppressDiagnostic = false;
+ while (true) {
+ // Parse next token.
+ PP.LexUnexpandedToken(Tok);
+
+already_lexed:
+ switch (Tok.getKind()) {
+ case tok::eof:
+ case tok::eod:
+ // Don't provide even a dummy value if the eod or eof marker is
+ // reached. Simply provide a diagnostic.
+ PP.Diag(Tok.getLocation(), diag::err_unterm_macro_invoc);
+ return;
+
+ case tok::comma:
+ if (!SuppressDiagnostic) {
+ PP.Diag(Tok.getLocation(), diag::err_too_many_args_in_macro_invoc);
+ SuppressDiagnostic = true;
+ }
+ continue;
+
+ case tok::l_paren:
+ ++ParenDepth;
+ if (Result.hasValue())
+ break;
+ if (!SuppressDiagnostic) {
+ PP.Diag(Tok.getLocation(), diag::err_pp_nested_paren) << II;
+ SuppressDiagnostic = true;
+ }
+ continue;
+
+ case tok::r_paren:
+ if (--ParenDepth > 0)
+ continue;
+
+ // The last ')' has been reached; return the value if one found or
+ // a diagnostic and a dummy value.
+ if (Result.hasValue()) {
+ OS << Result.getValue();
+ // For strict conformance to __has_cpp_attribute rules, use 'L'
+ // suffix for dated literals.
+ if (Result.getValue() > 1)
+ OS << 'L';
+ } else {
+ OS << 0;
+ if (!SuppressDiagnostic)
+ PP.Diag(Tok.getLocation(), diag::err_too_few_args_in_macro_invoc);
+ }
+ Tok.setKind(tok::numeric_constant);
+ return;
+
+ default: {
+ // Parse the macro argument, if one not found so far.
+ if (Result.hasValue())
+ break;
+
+ bool HasLexedNextToken = false;
+ Result = Op(Tok, HasLexedNextToken);
+ ResultTok = Tok;
+ if (HasLexedNextToken)
+ goto already_lexed;
+ continue;
+ }
+ }
+
+ // Diagnose missing ')'.
+ if (!SuppressDiagnostic) {
+ if (auto Diag = PP.Diag(Tok.getLocation(), diag::err_pp_expected_after)) {
+ if (IdentifierInfo *LastII = ResultTok.getIdentifierInfo())
+ Diag << LastII;
+ else
+ Diag << ResultTok.getKind();
+ Diag << tok::r_paren << ResultTok.getLocation();
+ }
+ PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+ SuppressDiagnostic = true;
+ }
+ }
+}
+
+/// Helper function to return the IdentifierInfo structure of a Token
+/// or generate a diagnostic if none available.
+static IdentifierInfo *ExpectFeatureIdentifierInfo(Token &Tok,
+ Preprocessor &PP,
+ signed DiagID) {
+ IdentifierInfo *II;
+ if (!Tok.isAnnotation() && (II = Tok.getIdentifierInfo()))
+ return II;
+
+ PP.Diag(Tok.getLocation(), DiagID);
+ return nullptr;
+}
+
+/// Implements the __is_target_arch builtin macro.
+static bool isTargetArch(const TargetInfo &TI, const IdentifierInfo *II) {
+ std::string ArchName = II->getName().lower() + "--";
+ llvm::Triple Arch(ArchName);
+ const llvm::Triple &TT = TI.getTriple();
+ if (TT.isThumb()) {
+ // arm matches thumb or thumbv7. armv7 matches thumbv7.
+ if ((Arch.getSubArch() == llvm::Triple::NoSubArch ||
+ Arch.getSubArch() == TT.getSubArch()) &&
+ ((TT.getArch() == llvm::Triple::thumb &&
+ Arch.getArch() == llvm::Triple::arm) ||
+ (TT.getArch() == llvm::Triple::thumbeb &&
+ Arch.getArch() == llvm::Triple::armeb)))
+ return true;
+ }
+ // Check the parsed arch when it has no sub arch to allow Clang to
+ // match thumb to thumbv7 but to prohibit matching thumbv6 to thumbv7.
+ return (Arch.getSubArch() == llvm::Triple::NoSubArch ||
+ Arch.getSubArch() == TT.getSubArch()) &&
+ Arch.getArch() == TT.getArch();
+}
+
+/// Implements the __is_target_vendor builtin macro.
+static bool isTargetVendor(const TargetInfo &TI, const IdentifierInfo *II) {
+ StringRef VendorName = TI.getTriple().getVendorName();
+ if (VendorName.empty())
+ VendorName = "unknown";
+ return VendorName.equals_lower(II->getName());
+}
+
+/// Implements the __is_target_os builtin macro.
+static bool isTargetOS(const TargetInfo &TI, const IdentifierInfo *II) {
+ std::string OSName =
+ (llvm::Twine("unknown-unknown-") + II->getName().lower()).str();
+ llvm::Triple OS(OSName);
+ if (OS.getOS() == llvm::Triple::Darwin) {
+ // Darwin matches macos, ios, etc.
+ return TI.getTriple().isOSDarwin();
+ }
+ return TI.getTriple().getOS() == OS.getOS();
+}
+
+/// Implements the __is_target_environment builtin macro.
+static bool isTargetEnvironment(const TargetInfo &TI,
+ const IdentifierInfo *II) {
+ std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str();
+ llvm::Triple Env(EnvName);
+ return TI.getTriple().getEnvironment() == Env.getEnvironment();
+}
+
+/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
+/// as a builtin macro, handle it and return the next token as 'Tok'.
+void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
+ // Figure out which token this is.
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ assert(II && "Can't be a macro without id info!");
+
+ // If this is an _Pragma or Microsoft __pragma directive, expand it,
+ // invoke the pragma handler, then lex the token after it.
+ if (II == Ident_Pragma)
+ return Handle_Pragma(Tok);
+ else if (II == Ident__pragma) // in non-MS mode this is null
+ return HandleMicrosoft__pragma(Tok);
+
+ ++NumBuiltinMacroExpanded;
+
+ SmallString<128> TmpBuffer;
+ llvm::raw_svector_ostream OS(TmpBuffer);
+
+ // Set up the return result.
+ Tok.setIdentifierInfo(nullptr);
+ Tok.clearFlag(Token::NeedsCleaning);
+ bool IsAtStartOfLine = Tok.isAtStartOfLine();
+ bool HasLeadingSpace = Tok.hasLeadingSpace();
+
+ if (II == Ident__LINE__) {
+ // C99 6.10.8: "__LINE__: The presumed line number (within the current
+ // source file) of the current source line (an integer constant)". This can
+ // be affected by #line.
+ SourceLocation Loc = Tok.getLocation();
+
+ // Advance to the location of the first _, this might not be the first byte
+ // of the token if it starts with an escaped newline.
+ Loc = AdvanceToTokenCharacter(Loc, 0);
+
+ // One wrinkle here is that GCC expands __LINE__ to location of the *end* of
+ // a macro expansion. This doesn't matter for object-like macros, but
+ // can matter for a function-like macro that expands to contain __LINE__.
+ // Skip down through expansion points until we find a file loc for the
+ // end of the expansion history.
+ Loc = SourceMgr.getExpansionRange(Loc).getEnd();
+ PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
+
+ // __LINE__ expands to a simple numeric value.
+ OS << (PLoc.isValid()? PLoc.getLine() : 1);
+ Tok.setKind(tok::numeric_constant);
+ } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__ ||
+ II == Ident__FILE_NAME__) {
+ // C99 6.10.8: "__FILE__: The presumed name of the current source file (a
+ // character string literal)". This can be affected by #line.
+ PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
+
+ // __BASE_FILE__ is a GNU extension that returns the top of the presumed
+ // #include stack instead of the current file.
+ if (II == Ident__BASE_FILE__ && PLoc.isValid()) {
+ SourceLocation NextLoc = PLoc.getIncludeLoc();
+ while (NextLoc.isValid()) {
+ PLoc = SourceMgr.getPresumedLoc(NextLoc);
+ if (PLoc.isInvalid())
+ break;
+
+ NextLoc = PLoc.getIncludeLoc();
+ }
+ }
+
+ // Escape this filename. Turn '\' -> '\\' '"' -> '\"'
+ SmallString<128> FN;
+ if (PLoc.isValid()) {
+ // __FILE_NAME__ is a Clang-specific extension that expands to the
+ // the last part of __FILE__.
+ if (II == Ident__FILE_NAME__) {
+ // Try to get the last path component, failing that return the original
+ // presumed location.
+ StringRef PLFileName = llvm::sys::path::filename(PLoc.getFilename());
+ if (PLFileName != "")
+ FN += PLFileName;
+ else
+ FN += PLoc.getFilename();
+ } else {
+ FN += PLoc.getFilename();
+ }
+ Lexer::Stringify(FN);
+ OS << '"' << FN << '"';
+ }
+ Tok.setKind(tok::string_literal);
+ } else if (II == Ident__DATE__) {
+ Diag(Tok.getLocation(), diag::warn_pp_date_time);
+ if (!DATELoc.isValid())
+ ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+ Tok.setKind(tok::string_literal);
+ Tok.setLength(strlen("\"Mmm dd yyyy\""));
+ Tok.setLocation(SourceMgr.createExpansionLoc(DATELoc, Tok.getLocation(),
+ Tok.getLocation(),
+ Tok.getLength()));
+ return;
+ } else if (II == Ident__TIME__) {
+ Diag(Tok.getLocation(), diag::warn_pp_date_time);
+ if (!TIMELoc.isValid())
+ ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+ Tok.setKind(tok::string_literal);
+ Tok.setLength(strlen("\"hh:mm:ss\""));
+ Tok.setLocation(SourceMgr.createExpansionLoc(TIMELoc, Tok.getLocation(),
+ Tok.getLocation(),
+ Tok.getLength()));
+ return;
+ } else if (II == Ident__INCLUDE_LEVEL__) {
+ // Compute the presumed include depth of this token. This can be affected
+ // by GNU line markers.
+ unsigned Depth = 0;
+
+ PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
+ if (PLoc.isValid()) {
+ PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
+ for (; PLoc.isValid(); ++Depth)
+ PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
+ }
+
+ // __INCLUDE_LEVEL__ expands to a simple numeric value.
+ OS << Depth;
+ Tok.setKind(tok::numeric_constant);
+ } else if (II == Ident__TIMESTAMP__) {
+ Diag(Tok.getLocation(), diag::warn_pp_date_time);
+ // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be
+ // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
+
+ // Get the file that we are lexing out of. If we're currently lexing from
+ // a macro, dig into the include stack.
+ const FileEntry *CurFile = nullptr;
+ PreprocessorLexer *TheLexer = getCurrentFileLexer();
+
+ if (TheLexer)
+ CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID());
+
+ const char *Result;
+ if (CurFile) {
+ time_t TT = CurFile->getModificationTime();
+ struct tm *TM = localtime(&TT);
+ Result = asctime(TM);
+ } else {
+ Result = "??? ??? ?? ??:??:?? ????\n";
+ }
+ // Surround the string with " and strip the trailing newline.
+ OS << '"' << StringRef(Result).drop_back() << '"';
+ Tok.setKind(tok::string_literal);
+ } else if (II == Ident__COUNTER__) {
+ // __COUNTER__ expands to a simple numeric value.
+ OS << CounterValue++;
+ Tok.setKind(tok::numeric_constant);
+ } else if (II == Ident__has_feature) {
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+ diag::err_feature_check_malformed);
+ return II && HasFeature(*this, II->getName());
+ });
+ } else if (II == Ident__has_extension) {
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+ diag::err_feature_check_malformed);
+ return II && HasExtension(*this, II->getName());
+ });
+ } else if (II == Ident__has_builtin) {
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+ diag::err_feature_check_malformed);
+ const LangOptions &LangOpts = getLangOpts();
+ if (!II)
+ return false;
+ else if (II->getBuiltinID() != 0) {
+ switch (II->getBuiltinID()) {
+ case Builtin::BI__builtin_operator_new:
+ case Builtin::BI__builtin_operator_delete:
+ // denotes date of behavior change to support calling arbitrary
+ // usual allocation and deallocation functions. Required by libc++
+ return 201802;
+ default:
+ return true;
+ }
+ return true;
+ } else if (II->getTokenID() != tok::identifier ||
+ II->hasRevertedTokenIDToIdentifier()) {
+ // Treat all keywords that introduce a custom syntax of the form
+ //
+ // '__some_keyword' '(' [...] ')'
+ //
+ // as being "builtin functions", even if the syntax isn't a valid
+ // function call (for example, because the builtin takes a type
+ // argument).
+ if (II->getName().startswith("__builtin_") ||
+ II->getName().startswith("__is_") ||
+ II->getName().startswith("__has_"))
+ return true;
+ return llvm::StringSwitch<bool>(II->getName())
+ .Case("__array_rank", true)
+ .Case("__array_extent", true)
+ .Case("__reference_binds_to_temporary", true)
+ .Case("__underlying_type", true)
+ .Default(false);
+ } else {
+ return llvm::StringSwitch<bool>(II->getName())
+ // Report builtin templates as being builtins.
+ .Case("__make_integer_seq", LangOpts.CPlusPlus)
+ .Case("__type_pack_element", LangOpts.CPlusPlus)
+ // Likewise for some builtin preprocessor macros.
+ // FIXME: This is inconsistent; we usually suggest detecting
+ // builtin macros via #ifdef. Don't add more cases here.
+ .Case("__is_target_arch", true)
+ .Case("__is_target_vendor", true)
+ .Case("__is_target_os", true)
+ .Case("__is_target_environment", true)
+ .Default(false);
+ }
+ });
+ } else if (II == Ident__is_identifier) {
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [](Token &Tok, bool &HasLexedNextToken) -> int {
+ return Tok.is(tok::identifier);
+ });
+ } else if (II == Ident__has_attribute) {
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+ diag::err_feature_check_malformed);
+ return II ? hasAttribute(AttrSyntax::GNU, nullptr, II,
+ getTargetInfo(), getLangOpts()) : 0;
+ });
+ } else if (II == Ident__has_declspec) {
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+ diag::err_feature_check_malformed);
+ return II ? hasAttribute(AttrSyntax::Declspec, nullptr, II,
+ getTargetInfo(), getLangOpts()) : 0;
+ });
+ } else if (II == Ident__has_cpp_attribute ||
+ II == Ident__has_c_attribute) {
+ bool IsCXX = II == Ident__has_cpp_attribute;
+ EvaluateFeatureLikeBuiltinMacro(
+ OS, Tok, II, *this, [&](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *ScopeII = nullptr;
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+ Tok, *this, diag::err_feature_check_malformed);
+ if (!II)
+ return false;
+
+ // It is possible to receive a scope token. Read the "::", if it is
+ // available, and the subsequent identifier.
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::coloncolon))
+ HasLexedNextToken = true;
+ else {
+ ScopeII = II;
+ LexUnexpandedToken(Tok);
+ II = ExpectFeatureIdentifierInfo(Tok, *this,
+ diag::err_feature_check_malformed);
+ }
+
+ AttrSyntax Syntax = IsCXX ? AttrSyntax::CXX : AttrSyntax::C;
+ return II ? hasAttribute(Syntax, ScopeII, II, getTargetInfo(),
+ getLangOpts())
+ : 0;
+ });
+ } else if (II == Ident__has_include ||
+ II == Ident__has_include_next) {
+ // The argument to these two builtins should be a parenthesized
+ // file name string literal using angle brackets (<>) or
+ // double-quotes ("").
+ bool Value;
+ if (II == Ident__has_include)
+ Value = EvaluateHasInclude(Tok, II, *this);
+ else
+ Value = EvaluateHasIncludeNext(Tok, II, *this);
+
+ if (Tok.isNot(tok::r_paren))
+ return;
+ OS << (int)Value;
+ Tok.setKind(tok::numeric_constant);
+ } else if (II == Ident__has_warning) {
+ // The argument should be a parenthesized string literal.
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ std::string WarningName;
+ SourceLocation StrStartLoc = Tok.getLocation();
+
+ HasLexedNextToken = Tok.is(tok::string_literal);
+ if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
+ /*AllowMacroExpansion=*/false))
+ return false;
+
+ // FIXME: Should we accept "-R..." flags here, or should that be
+ // handled by a separate __has_remark?
+ if (WarningName.size() < 3 || WarningName[0] != '-' ||
+ WarningName[1] != 'W') {
+ Diag(StrStartLoc, diag::warn_has_warning_invalid_option);
+ return false;
+ }
+
+ // Finally, check if the warning flags maps to a diagnostic group.
+ // We construct a SmallVector here to talk to getDiagnosticIDs().
+ // Although we don't use the result, this isn't a hot path, and not
+ // worth special casing.
+ SmallVector<diag::kind, 10> Diags;
+ return !getDiagnostics().getDiagnosticIDs()->
+ getDiagnosticsInGroup(diag::Flavor::WarningOrError,
+ WarningName.substr(2), Diags);
+ });
+ } else if (II == Ident__building_module) {
+ // The argument to this builtin should be an identifier. The
+ // builtin evaluates to 1 when that identifier names the module we are
+ // currently building.
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+ diag::err_expected_id_building_module);
+ return getLangOpts().isCompilingModule() && II &&
+ (II->getName() == getLangOpts().CurrentModule);
+ });
+ } else if (II == Ident__MODULE__) {
+ // The current module as an identifier.
+ OS << getLangOpts().CurrentModule;
+ IdentifierInfo *ModuleII = getIdentifierInfo(getLangOpts().CurrentModule);
+ Tok.setIdentifierInfo(ModuleII);
+ Tok.setKind(ModuleII->getTokenID());
+ } else if (II == Ident__identifier) {
+ SourceLocation Loc = Tok.getLocation();
+
+ // We're expecting '__identifier' '(' identifier ')'. Try to recover
+ // if the parens are missing.
+ LexNonComment(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ // No '(', use end of last token.
+ Diag(getLocForEndOfToken(Loc), diag::err_pp_expected_after)
+ << II << tok::l_paren;
+ // If the next token isn't valid as our argument, we can't recover.
+ if (!Tok.isAnnotation() && Tok.getIdentifierInfo())
+ Tok.setKind(tok::identifier);
+ return;
+ }
+
+ SourceLocation LParenLoc = Tok.getLocation();
+ LexNonComment(Tok);
+
+ if (!Tok.isAnnotation() && Tok.getIdentifierInfo())
+ Tok.setKind(tok::identifier);
+ else {
+ Diag(Tok.getLocation(), diag::err_pp_identifier_arg_not_identifier)
+ << Tok.getKind();
+ // Don't walk past anything that's not a real token.
+ if (Tok.isOneOf(tok::eof, tok::eod) || Tok.isAnnotation())
+ return;
+ }
+
+ // Discard the ')', preserving 'Tok' as our result.
+ Token RParen;
+ LexNonComment(RParen);
+ if (RParen.isNot(tok::r_paren)) {
+ Diag(getLocForEndOfToken(Tok.getLocation()), diag::err_pp_expected_after)
+ << Tok.getKind() << tok::r_paren;
+ Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+ }
+ return;
+ } else if (II == Ident__is_target_arch) {
+ EvaluateFeatureLikeBuiltinMacro(
+ OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+ Tok, *this, diag::err_feature_check_malformed);
+ return II && isTargetArch(getTargetInfo(), II);
+ });
+ } else if (II == Ident__is_target_vendor) {
+ EvaluateFeatureLikeBuiltinMacro(
+ OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+ Tok, *this, diag::err_feature_check_malformed);
+ return II && isTargetVendor(getTargetInfo(), II);
+ });
+ } else if (II == Ident__is_target_os) {
+ EvaluateFeatureLikeBuiltinMacro(
+ OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+ Tok, *this, diag::err_feature_check_malformed);
+ return II && isTargetOS(getTargetInfo(), II);
+ });
+ } else if (II == Ident__is_target_environment) {
+ EvaluateFeatureLikeBuiltinMacro(
+ OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(
+ Tok, *this, diag::err_feature_check_malformed);
+ return II && isTargetEnvironment(getTargetInfo(), II);
+ });
+ } else {
+ llvm_unreachable("Unknown identifier!");
+ }
+ CreateString(OS.str(), Tok, Tok.getLocation(), Tok.getLocation());
+ Tok.setFlagValue(Token::StartOfLine, IsAtStartOfLine);
+ Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+}
+
+void Preprocessor::markMacroAsUsed(MacroInfo *MI) {
+ // If the 'used' status changed, and the macro requires 'unused' warning,
+ // remove its SourceLocation from the warn-for-unused-macro locations.
+ if (MI->isWarnIfUnused() && !MI->isUsed())
+ WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
+ MI->setIsUsed(true);
+}
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
new file mode 100644
index 000000000000..79953804b5d3
--- /dev/null
+++ b/clang/lib/Lex/Pragma.cpp
@@ -0,0 +1,1914 @@
+//===- Pragma.cpp - Pragma registration and handling ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PragmaHandler/PragmaTable interfaces and implements
+// pragma related methods of the Preprocessor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Pragma.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/Token.h"
+#include "clang/Lex/TokenLexer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+
+// Out-of-line destructor to provide a home for the class.
+PragmaHandler::~PragmaHandler() = default;
+
+//===----------------------------------------------------------------------===//
+// EmptyPragmaHandler Implementation.
+//===----------------------------------------------------------------------===//
+
+EmptyPragmaHandler::EmptyPragmaHandler(StringRef Name) : PragmaHandler(Name) {}
+
+void EmptyPragmaHandler::HandlePragma(Preprocessor &PP,
+ PragmaIntroducer Introducer,
+ Token &FirstToken) {}
+
+//===----------------------------------------------------------------------===//
+// PragmaNamespace Implementation.
+//===----------------------------------------------------------------------===//
+
+PragmaNamespace::~PragmaNamespace() {
+ llvm::DeleteContainerSeconds(Handlers);
+}
+
+/// FindHandler - Check to see if there is already a handler for the
+/// specified name. If not, return the handler for the null identifier if it
+/// exists, otherwise return null. If IgnoreNull is true (the default) then
+/// the null handler isn't returned on failure to match.
+PragmaHandler *PragmaNamespace::FindHandler(StringRef Name,
+ bool IgnoreNull) const {
+ if (PragmaHandler *Handler = Handlers.lookup(Name))
+ return Handler;
+ return IgnoreNull ? nullptr : Handlers.lookup(StringRef());
+}
+
+void PragmaNamespace::AddPragma(PragmaHandler *Handler) {
+ assert(!Handlers.lookup(Handler->getName()) &&
+ "A handler with this name is already registered in this namespace");
+ Handlers[Handler->getName()] = Handler;
+}
+
+void PragmaNamespace::RemovePragmaHandler(PragmaHandler *Handler) {
+ assert(Handlers.lookup(Handler->getName()) &&
+ "Handler not registered in this namespace");
+ Handlers.erase(Handler->getName());
+}
+
+void PragmaNamespace::HandlePragma(Preprocessor &PP,
+ PragmaIntroducer Introducer, Token &Tok) {
+ // Read the 'namespace' that the directive is in, e.g. STDC. Do not macro
+ // expand it, the user can have a STDC #define, that should not affect this.
+ PP.LexUnexpandedToken(Tok);
+
+ // Get the handler for this token. If there is no handler, ignore the pragma.
+ PragmaHandler *Handler
+ = FindHandler(Tok.getIdentifierInfo() ? Tok.getIdentifierInfo()->getName()
+ : StringRef(),
+ /*IgnoreNull=*/false);
+ if (!Handler) {
+ PP.Diag(Tok, diag::warn_pragma_ignored);
+ return;
+ }
+
+ // Otherwise, pass it down.
+ Handler->HandlePragma(PP, Introducer, Tok);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Pragma Directive Handling.
+//===----------------------------------------------------------------------===//
+
+namespace {
+// TokenCollector provides the option to collect tokens that were "read"
+// and return them to the stream to be read later.
+// Currently used when reading _Pragma/__pragma directives.
+struct TokenCollector {
+ Preprocessor &Self;
+ bool Collect;
+ SmallVector<Token, 3> Tokens;
+ Token &Tok;
+
+ void lex() {
+ if (Collect)
+ Tokens.push_back(Tok);
+ Self.Lex(Tok);
+ }
+
+ void revert() {
+ assert(Collect && "did not collect tokens");
+ assert(!Tokens.empty() && "collected unexpected number of tokens");
+
+ // Push the ( "string" ) tokens into the token stream.
+ auto Toks = std::make_unique<Token[]>(Tokens.size());
+ std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get());
+ Toks[Tokens.size() - 1] = Tok;
+ Self.EnterTokenStream(std::move(Toks), Tokens.size(),
+ /*DisableMacroExpansion*/ true,
+ /*IsReinject*/ true);
+
+ // ... and return the pragma token unchanged.
+ Tok = *Tokens.begin();
+ }
+};
+} // namespace
+
+/// HandlePragmaDirective - The "\#pragma" directive has been parsed. Lex the
+/// rest of the pragma, passing it to the registered pragma handlers.
+void Preprocessor::HandlePragmaDirective(PragmaIntroducer Introducer) {
+ if (Callbacks)
+ Callbacks->PragmaDirective(Introducer.Loc, Introducer.Kind);
+
+ if (!PragmasEnabled)
+ return;
+
+ ++NumPragma;
+
+ // Invoke the first level of pragma handlers which reads the namespace id.
+ Token Tok;
+ PragmaHandlers->HandlePragma(*this, Introducer, Tok);
+
+ // If the pragma handler didn't read the rest of the line, consume it now.
+ if ((CurTokenLexer && CurTokenLexer->isParsingPreprocessorDirective())
+ || (CurPPLexer && CurPPLexer->ParsingPreprocessorDirective))
+ DiscardUntilEndOfDirective();
+}
+
+/// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
+/// return the first token after the directive. The _Pragma token has just
+/// been read into 'Tok'.
+void Preprocessor::Handle_Pragma(Token &Tok) {
+ // C11 6.10.3.4/3:
+ // all pragma unary operator expressions within [a completely
+ // macro-replaced preprocessing token sequence] are [...] processed [after
+ // rescanning is complete]
+ //
+ // This means that we execute _Pragma operators in two cases:
+ //
+ // 1) on token sequences that would otherwise be produced as the output of
+ // phase 4 of preprocessing, and
+ // 2) on token sequences formed as the macro-replaced token sequence of a
+ // macro argument
+ //
+ // Case #2 appears to be a wording bug: only _Pragmas that would survive to
+ // the end of phase 4 should actually be executed. Discussion on the WG14
+ // mailing list suggests that a _Pragma operator is notionally checked early,
+ // but only pragmas that survive to the end of phase 4 should be executed.
+ //
+ // In Case #2, we check the syntax now, but then put the tokens back into the
+ // token stream for later consumption.
+
+ TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok};
+
+ // Remember the pragma token location.
+ SourceLocation PragmaLoc = Tok.getLocation();
+
+ // Read the '('.
+ Toks.lex();
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(PragmaLoc, diag::err__Pragma_malformed);
+ return;
+ }
+
+ // Read the '"..."'.
+ Toks.lex();
+ if (!tok::isStringLiteral(Tok.getKind())) {
+ Diag(PragmaLoc, diag::err__Pragma_malformed);
+ // Skip bad tokens, and the ')', if present.
+ if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eof))
+ Lex(Tok);
+ while (Tok.isNot(tok::r_paren) &&
+ !Tok.isAtStartOfLine() &&
+ Tok.isNot(tok::eof))
+ Lex(Tok);
+ if (Tok.is(tok::r_paren))
+ Lex(Tok);
+ return;
+ }
+
+ if (Tok.hasUDSuffix()) {
+ Diag(Tok, diag::err_invalid_string_udl);
+ // Skip this token, and the ')', if present.
+ Lex(Tok);
+ if (Tok.is(tok::r_paren))
+ Lex(Tok);
+ return;
+ }
+
+ // Remember the string.
+ Token StrTok = Tok;
+
+ // Read the ')'.
+ Toks.lex();
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(PragmaLoc, diag::err__Pragma_malformed);
+ return;
+ }
+
+ // If we're expanding a macro argument, put the tokens back.
+ if (InMacroArgPreExpansion) {
+ Toks.revert();
+ return;
+ }
+
+ SourceLocation RParenLoc = Tok.getLocation();
+ std::string StrVal = getSpelling(StrTok);
+
+ // The _Pragma is lexically sound. Destringize according to C11 6.10.9.1:
+ // "The string literal is destringized by deleting any encoding prefix,
+ // deleting the leading and trailing double-quotes, replacing each escape
+ // sequence \" by a double-quote, and replacing each escape sequence \\ by a
+ // single backslash."
+ if (StrVal[0] == 'L' || StrVal[0] == 'U' ||
+ (StrVal[0] == 'u' && StrVal[1] != '8'))
+ StrVal.erase(StrVal.begin());
+ else if (StrVal[0] == 'u')
+ StrVal.erase(StrVal.begin(), StrVal.begin() + 2);
+
+ if (StrVal[0] == 'R') {
+ // FIXME: C++11 does not specify how to handle raw-string-literals here.
+ // We strip off the 'R', the quotes, the d-char-sequences, and the parens.
+ assert(StrVal[1] == '"' && StrVal[StrVal.size() - 1] == '"' &&
+ "Invalid raw string token!");
+
+ // Measure the length of the d-char-sequence.
+ unsigned NumDChars = 0;
+ while (StrVal[2 + NumDChars] != '(') {
+ assert(NumDChars < (StrVal.size() - 5) / 2 &&
+ "Invalid raw string token!");
+ ++NumDChars;
+ }
+ assert(StrVal[StrVal.size() - 2 - NumDChars] == ')');
+
+ // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the
+ // parens below.
+ StrVal.erase(0, 2 + NumDChars);
+ StrVal.erase(StrVal.size() - 1 - NumDChars);
+ } else {
+ assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+ "Invalid string token!");
+
+ // Remove escaped quotes and escapes.
+ unsigned ResultPos = 1;
+ for (size_t i = 1, e = StrVal.size() - 1; i != e; ++i) {
+ // Skip escapes. \\ -> '\' and \" -> '"'.
+ if (StrVal[i] == '\\' && i + 1 < e &&
+ (StrVal[i + 1] == '\\' || StrVal[i + 1] == '"'))
+ ++i;
+ StrVal[ResultPos++] = StrVal[i];
+ }
+ StrVal.erase(StrVal.begin() + ResultPos, StrVal.end() - 1);
+ }
+
+ // Remove the front quote, replacing it with a space, so that the pragma
+ // contents appear to have a space before them.
+ StrVal[0] = ' ';
+
+ // Replace the terminating quote with a \n.
+ StrVal[StrVal.size()-1] = '\n';
+
+ // Plop the string (including the newline and trailing null) into a buffer
+ // where we can lex it.
+ Token TmpTok;
+ TmpTok.startToken();
+ CreateString(StrVal, TmpTok);
+ SourceLocation TokLoc = TmpTok.getLocation();
+
+ // Make and enter a lexer object so that we lex and expand the tokens just
+ // like any others.
+ Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc,
+ StrVal.size(), *this);
+
+ EnterSourceFileWithLexer(TL, nullptr);
+
+ // With everything set up, lex this as a #pragma directive.
+ HandlePragmaDirective({PIK__Pragma, PragmaLoc});
+
+ // Finally, return whatever came after the pragma directive.
+ return Lex(Tok);
+}
+
+/// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
+/// is not enclosed within a string literal.
+void Preprocessor::HandleMicrosoft__pragma(Token &Tok) {
+ // During macro pre-expansion, check the syntax now but put the tokens back
+ // into the token stream for later consumption. Same as Handle_Pragma.
+ TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok};
+
+ // Remember the pragma token location.
+ SourceLocation PragmaLoc = Tok.getLocation();
+
+ // Read the '('.
+ Toks.lex();
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(PragmaLoc, diag::err__Pragma_malformed);
+ return;
+ }
+
+ // Get the tokens enclosed within the __pragma(), as well as the final ')'.
+ SmallVector<Token, 32> PragmaToks;
+ int NumParens = 0;
+ Toks.lex();
+ while (Tok.isNot(tok::eof)) {
+ PragmaToks.push_back(Tok);
+ if (Tok.is(tok::l_paren))
+ NumParens++;
+ else if (Tok.is(tok::r_paren) && NumParens-- == 0)
+ break;
+ Toks.lex();
+ }
+
+ if (Tok.is(tok::eof)) {
+ Diag(PragmaLoc, diag::err_unterminated___pragma);
+ return;
+ }
+
+ // If we're expanding a macro argument, put the tokens back.
+ if (InMacroArgPreExpansion) {
+ Toks.revert();
+ return;
+ }
+
+ PragmaToks.front().setFlag(Token::LeadingSpace);
+
+ // Replace the ')' with an EOD to mark the end of the pragma.
+ PragmaToks.back().setKind(tok::eod);
+
+ Token *TokArray = new Token[PragmaToks.size()];
+ std::copy(PragmaToks.begin(), PragmaToks.end(), TokArray);
+
+ // Push the tokens onto the stack.
+ EnterTokenStream(TokArray, PragmaToks.size(), true, true,
+ /*IsReinject*/ false);
+
+ // With everything set up, lex this as a #pragma directive.
+ HandlePragmaDirective({PIK___pragma, PragmaLoc});
+
+ // Finally, return whatever came after the pragma directive.
+ return Lex(Tok);
+}
+
+/// HandlePragmaOnce - Handle \#pragma once. OnceTok is the 'once'.
+void Preprocessor::HandlePragmaOnce(Token &OnceTok) {
+ // Don't honor the 'once' when handling the primary source file, unless
+ // this is a prefix to a TU, which indicates we're generating a PCH file, or
+ // when the main file is a header (e.g. when -xc-header is provided on the
+ // commandline).
+ if (isInPrimaryFile() && TUKind != TU_Prefix && !getLangOpts().IsHeaderFile) {
+ Diag(OnceTok, diag::pp_pragma_once_in_main_file);
+ return;
+ }
+
+ // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc.
+ // Mark the file as a once-only file now.
+ HeaderInfo.MarkFileIncludeOnce(getCurrentFileLexer()->getFileEntry());
+}
+
+void Preprocessor::HandlePragmaMark() {
+ assert(CurPPLexer && "No current lexer?");
+ CurLexer->ReadToEndOfLine();
+}
+
+/// HandlePragmaPoison - Handle \#pragma GCC poison. PoisonTok is the 'poison'.
+void Preprocessor::HandlePragmaPoison() {
+ Token Tok;
+
+ while (true) {
+ // Read the next token to poison. While doing this, pretend that we are
+ // skipping while reading the identifier to poison.
+ // This avoids errors on code like:
+ // #pragma GCC poison X
+ // #pragma GCC poison X
+ if (CurPPLexer) CurPPLexer->LexingRawMode = true;
+ LexUnexpandedToken(Tok);
+ if (CurPPLexer) CurPPLexer->LexingRawMode = false;
+
+ // If we reached the end of line, we're done.
+ if (Tok.is(tok::eod)) return;
+
+ // Can only poison identifiers.
+ if (Tok.isNot(tok::raw_identifier)) {
+ Diag(Tok, diag::err_pp_invalid_poison);
+ return;
+ }
+
+ // Look up the identifier info for the token. We disabled identifier lookup
+ // by saying we're skipping contents, so we need to do this manually.
+ IdentifierInfo *II = LookUpIdentifierInfo(Tok);
+
+ // Already poisoned.
+ if (II->isPoisoned()) continue;
+
+ // If this is a macro identifier, emit a warning.
+ if (isMacroDefined(II))
+ Diag(Tok, diag::pp_poisoning_existing_macro);
+
+ // Finally, poison it!
+ II->setIsPoisoned();
+ if (II->isFromAST())
+ II->setChangedSinceDeserialization();
+ }
+}
+
+/// HandlePragmaSystemHeader - Implement \#pragma GCC system_header. We know
+/// that the whole directive has been parsed.
+void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
+ if (isInPrimaryFile()) {
+ Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file);
+ return;
+ }
+
+ // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc.
+ PreprocessorLexer *TheLexer = getCurrentFileLexer();
+
+ // Mark the file as a system header.
+ HeaderInfo.MarkFileSystemHeader(TheLexer->getFileEntry());
+
+ PresumedLoc PLoc = SourceMgr.getPresumedLoc(SysHeaderTok.getLocation());
+ if (PLoc.isInvalid())
+ return;
+
+ unsigned FilenameID = SourceMgr.getLineTableFilenameID(PLoc.getFilename());
+
+ // Notify the client, if desired, that we are in a new source file.
+ if (Callbacks)
+ Callbacks->FileChanged(SysHeaderTok.getLocation(),
+ PPCallbacks::SystemHeaderPragma, SrcMgr::C_System);
+
+ // Emit a line marker. This will change any source locations from this point
+ // forward to realize they are in a system header.
+ // Create a line note with this information.
+ SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine() + 1,
+ FilenameID, /*IsEntry=*/false, /*IsExit=*/false,
+ SrcMgr::C_System);
+}
+
+/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah.
+void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
+ Token FilenameTok;
+ if (LexHeaderName(FilenameTok, /*AllowConcatenation*/false))
+ return;
+
+ // If the next token wasn't a header-name, diagnose the error.
+ if (FilenameTok.isNot(tok::header_name)) {
+ Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+ return;
+ }
+
+ // Reserve a buffer to get the spelling.
+ SmallString<128> FilenameBuffer;
+ bool Invalid = false;
+ StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid);
+ if (Invalid)
+ return;
+
+ bool isAngled =
+ GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+ // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+ // error.
+ if (Filename.empty())
+ return;
+
+ // Search include directories for this file.
+ const DirectoryLookup *CurDir;
+ Optional<FileEntryRef> File =
+ LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr,
+ nullptr, CurDir, nullptr, nullptr, nullptr, nullptr, nullptr);
+ if (!File) {
+ if (!SuppressIncludeNotFoundError)
+ Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+ return;
+ }
+
+ const FileEntry *CurFile = getCurrentFileLexer()->getFileEntry();
+
+ // If this file is older than the file it depends on, emit a diagnostic.
+ if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) {
+ // Lex tokens at the end of the message and include them in the message.
+ std::string Message;
+ Lex(DependencyTok);
+ while (DependencyTok.isNot(tok::eod)) {
+ Message += getSpelling(DependencyTok) + " ";
+ Lex(DependencyTok);
+ }
+
+ // Remove the trailing ' ' if present.
+ if (!Message.empty())
+ Message.erase(Message.end()-1);
+ Diag(FilenameTok, diag::pp_out_of_date_dependency) << Message;
+ }
+}
+
+/// ParsePragmaPushOrPopMacro - Handle parsing of pragma push_macro/pop_macro.
+/// Return the IdentifierInfo* associated with the macro to push or pop.
+IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) {
+ // Remember the pragma token location.
+ Token PragmaTok = Tok;
+
+ // Read the '('.
+ Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(PragmaTok.getLocation(), diag::err_pragma_push_pop_macro_malformed)
+ << getSpelling(PragmaTok);
+ return nullptr;
+ }
+
+ // Read the macro name string.
+ Lex(Tok);
+ if (Tok.isNot(tok::string_literal)) {
+ Diag(PragmaTok.getLocation(), diag::err_pragma_push_pop_macro_malformed)
+ << getSpelling(PragmaTok);
+ return nullptr;
+ }
+
+ if (Tok.hasUDSuffix()) {
+ Diag(Tok, diag::err_invalid_string_udl);
+ return nullptr;
+ }
+
+ // Remember the macro string.
+ std::string StrVal = getSpelling(Tok);
+
+ // Read the ')'.
+ Lex(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(PragmaTok.getLocation(), diag::err_pragma_push_pop_macro_malformed)
+ << getSpelling(PragmaTok);
+ return nullptr;
+ }
+
+ assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+ "Invalid string token!");
+
+ // Create a Token from the string.
+ Token MacroTok;
+ MacroTok.startToken();
+ MacroTok.setKind(tok::raw_identifier);
+ CreateString(StringRef(&StrVal[1], StrVal.size() - 2), MacroTok);
+
+ // Get the IdentifierInfo of MacroToPushTok.
+ return LookUpIdentifierInfo(MacroTok);
+}
+
+/// Handle \#pragma push_macro.
+///
+/// The syntax is:
+/// \code
+/// #pragma push_macro("macro")
+/// \endcode
+void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) {
+ // Parse the pragma directive and get the macro IdentifierInfo*.
+ IdentifierInfo *IdentInfo = ParsePragmaPushOrPopMacro(PushMacroTok);
+ if (!IdentInfo) return;
+
+ // Get the MacroInfo associated with IdentInfo.
+ MacroInfo *MI = getMacroInfo(IdentInfo);
+
+ if (MI) {
+ // Allow the original MacroInfo to be redefined later.
+ MI->setIsAllowRedefinitionsWithoutWarning(true);
+ }
+
+ // Push the cloned MacroInfo so we can retrieve it later.
+ PragmaPushMacroInfo[IdentInfo].push_back(MI);
+}
+
+/// Handle \#pragma pop_macro.
+///
+/// The syntax is:
+/// \code
+/// #pragma pop_macro("macro")
+/// \endcode
+void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) {
+ SourceLocation MessageLoc = PopMacroTok.getLocation();
+
+ // Parse the pragma directive and get the macro IdentifierInfo*.
+ IdentifierInfo *IdentInfo = ParsePragmaPushOrPopMacro(PopMacroTok);
+ if (!IdentInfo) return;
+
+ // Find the vector<MacroInfo*> associated with the macro.
+ llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>::iterator iter =
+ PragmaPushMacroInfo.find(IdentInfo);
+ if (iter != PragmaPushMacroInfo.end()) {
+ // Forget the MacroInfo currently associated with IdentInfo.
+ if (MacroInfo *MI = getMacroInfo(IdentInfo)) {
+ if (MI->isWarnIfUnused())
+ WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
+ appendMacroDirective(IdentInfo, AllocateUndefMacroDirective(MessageLoc));
+ }
+
+ // Get the MacroInfo we want to reinstall.
+ MacroInfo *MacroToReInstall = iter->second.back();
+
+ if (MacroToReInstall)
+ // Reinstall the previously pushed macro.
+ appendDefMacroDirective(IdentInfo, MacroToReInstall, MessageLoc);
+
+ // Pop PragmaPushMacroInfo stack.
+ iter->second.pop_back();
+ if (iter->second.empty())
+ PragmaPushMacroInfo.erase(iter);
+ } else {
+ Diag(MessageLoc, diag::warn_pragma_pop_macro_no_push)
+ << IdentInfo->getName();
+ }
+}
+
+void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) {
+ // We will either get a quoted filename or a bracketed filename, and we
+ // have to track which we got. The first filename is the source name,
+ // and the second name is the mapped filename. If the first is quoted,
+ // the second must be as well (cannot mix and match quotes and brackets).
+
+ // Get the open paren
+ Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(Tok, diag::warn_pragma_include_alias_expected) << "(";
+ return;
+ }
+
+ // We expect either a quoted string literal, or a bracketed name
+ Token SourceFilenameTok;
+ if (LexHeaderName(SourceFilenameTok))
+ return;
+
+ StringRef SourceFileName;
+ SmallString<128> FileNameBuffer;
+ if (SourceFilenameTok.is(tok::header_name)) {
+ SourceFileName = getSpelling(SourceFilenameTok, FileNameBuffer);
+ } else {
+ Diag(Tok, diag::warn_pragma_include_alias_expected_filename);
+ return;
+ }
+ FileNameBuffer.clear();
+
+ // Now we expect a comma, followed by another include name
+ Lex(Tok);
+ if (Tok.isNot(tok::comma)) {
+ Diag(Tok, diag::warn_pragma_include_alias_expected) << ",";
+ return;
+ }
+
+ Token ReplaceFilenameTok;
+ if (LexHeaderName(ReplaceFilenameTok))
+ return;
+
+ StringRef ReplaceFileName;
+ if (ReplaceFilenameTok.is(tok::header_name)) {
+ ReplaceFileName = getSpelling(ReplaceFilenameTok, FileNameBuffer);
+ } else {
+ Diag(Tok, diag::warn_pragma_include_alias_expected_filename);
+ return;
+ }
+
+ // Finally, we expect the closing paren
+ Lex(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::warn_pragma_include_alias_expected) << ")";
+ return;
+ }
+
+ // Now that we have the source and target filenames, we need to make sure
+ // they're both of the same type (angled vs non-angled)
+ StringRef OriginalSource = SourceFileName;
+
+ bool SourceIsAngled =
+ GetIncludeFilenameSpelling(SourceFilenameTok.getLocation(),
+ SourceFileName);
+ bool ReplaceIsAngled =
+ GetIncludeFilenameSpelling(ReplaceFilenameTok.getLocation(),
+ ReplaceFileName);
+ if (!SourceFileName.empty() && !ReplaceFileName.empty() &&
+ (SourceIsAngled != ReplaceIsAngled)) {
+ unsigned int DiagID;
+ if (SourceIsAngled)
+ DiagID = diag::warn_pragma_include_alias_mismatch_angle;
+ else
+ DiagID = diag::warn_pragma_include_alias_mismatch_quote;
+
+ Diag(SourceFilenameTok.getLocation(), DiagID)
+ << SourceFileName
+ << ReplaceFileName;
+
+ return;
+ }
+
+ // Now we can let the include handler know about this mapping
+ getHeaderSearchInfo().AddIncludeAlias(OriginalSource, ReplaceFileName);
+}
+
+// Lex a component of a module name: either an identifier or a string literal;
+// for components that can be expressed both ways, the two forms are equivalent.
+static bool LexModuleNameComponent(
+ Preprocessor &PP, Token &Tok,
+ std::pair<IdentifierInfo *, SourceLocation> &ModuleNameComponent,
+ bool First) {
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) {
+ StringLiteralParser Literal(Tok, PP);
+ if (Literal.hadError)
+ return true;
+ ModuleNameComponent = std::make_pair(
+ PP.getIdentifierInfo(Literal.GetString()), Tok.getLocation());
+ } else if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) {
+ ModuleNameComponent =
+ std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation());
+ } else {
+ PP.Diag(Tok.getLocation(), diag::err_pp_expected_module_name) << First;
+ return true;
+ }
+ return false;
+}
+
+static bool LexModuleName(
+ Preprocessor &PP, Token &Tok,
+ llvm::SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>>
+ &ModuleName) {
+ while (true) {
+ std::pair<IdentifierInfo*, SourceLocation> NameComponent;
+ if (LexModuleNameComponent(PP, Tok, NameComponent, ModuleName.empty()))
+ return true;
+ ModuleName.push_back(NameComponent);
+
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::period))
+ return false;
+ }
+}
+
+void Preprocessor::HandlePragmaModuleBuild(Token &Tok) {
+ SourceLocation Loc = Tok.getLocation();
+
+ std::pair<IdentifierInfo *, SourceLocation> ModuleNameLoc;
+ if (LexModuleNameComponent(*this, Tok, ModuleNameLoc, true))
+ return;
+ IdentifierInfo *ModuleName = ModuleNameLoc.first;
+
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::eod)) {
+ Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+ DiscardUntilEndOfDirective();
+ }
+
+ CurLexer->LexingRawMode = true;
+
+ auto TryConsumeIdentifier = [&](StringRef Ident) -> bool {
+ if (Tok.getKind() != tok::raw_identifier ||
+ Tok.getRawIdentifier() != Ident)
+ return false;
+ CurLexer->Lex(Tok);
+ return true;
+ };
+
+ // Scan forward looking for the end of the module.
+ const char *Start = CurLexer->getBufferLocation();
+ const char *End = nullptr;
+ unsigned NestingLevel = 1;
+ while (true) {
+ End = CurLexer->getBufferLocation();
+ CurLexer->Lex(Tok);
+
+ if (Tok.is(tok::eof)) {
+ Diag(Loc, diag::err_pp_module_build_missing_end);
+ break;
+ }
+
+ if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) {
+ // Token was part of module; keep going.
+ continue;
+ }
+
+ // We hit something directive-shaped; check to see if this is the end
+ // of the module build.
+ CurLexer->ParsingPreprocessorDirective = true;
+ CurLexer->Lex(Tok);
+ if (TryConsumeIdentifier("pragma") && TryConsumeIdentifier("clang") &&
+ TryConsumeIdentifier("module")) {
+ if (TryConsumeIdentifier("build"))
+ // #pragma clang module build -> entering a nested module build.
+ ++NestingLevel;
+ else if (TryConsumeIdentifier("endbuild")) {
+ // #pragma clang module endbuild -> leaving a module build.
+ if (--NestingLevel == 0)
+ break;
+ }
+ // We should either be looking at the EOD or more of the current directive
+ // preceding the EOD. Either way we can ignore this token and keep going.
+ assert(Tok.getKind() != tok::eof && "missing EOD before EOF");
+ }
+ }
+
+ CurLexer->LexingRawMode = false;
+
+ // Load the extracted text as a preprocessed module.
+ assert(CurLexer->getBuffer().begin() <= Start &&
+ Start <= CurLexer->getBuffer().end() &&
+ CurLexer->getBuffer().begin() <= End &&
+ End <= CurLexer->getBuffer().end() &&
+ "module source range not contained within same file buffer");
+ TheModuleLoader.loadModuleFromSource(Loc, ModuleName->getName(),
+ StringRef(Start, End - Start));
+}
+
+void Preprocessor::HandlePragmaHdrstop(Token &Tok) {
+ Lex(Tok);
+ if (Tok.is(tok::l_paren)) {
+ Diag(Tok.getLocation(), diag::warn_pp_hdrstop_filename_ignored);
+
+ std::string FileName;
+ if (!LexStringLiteral(Tok, FileName, "pragma hdrstop", false))
+ return;
+
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::err_expected) << tok::r_paren;
+ return;
+ }
+ Lex(Tok);
+ }
+ if (Tok.isNot(tok::eod))
+ Diag(Tok.getLocation(), diag::ext_pp_extra_tokens_at_eol)
+ << "pragma hdrstop";
+
+ if (creatingPCHWithPragmaHdrStop() &&
+ SourceMgr.isInMainFile(Tok.getLocation())) {
+ assert(CurLexer && "no lexer for #pragma hdrstop processing");
+ Token &Result = Tok;
+ Result.startToken();
+ CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
+ CurLexer->cutOffLexing();
+ }
+ if (usingPCHWithPragmaHdrStop())
+ SkippingUntilPragmaHdrStop = false;
+}
+
+/// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
+/// If 'Namespace' is non-null, then it is a token required to exist on the
+/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
+void Preprocessor::AddPragmaHandler(StringRef Namespace,
+ PragmaHandler *Handler) {
+ PragmaNamespace *InsertNS = PragmaHandlers.get();
+
+ // If this is specified to be in a namespace, step down into it.
+ if (!Namespace.empty()) {
+ // If there is already a pragma handler with the name of this namespace,
+ // we either have an error (directive with the same name as a namespace) or
+ // we already have the namespace to insert into.
+ if (PragmaHandler *Existing = PragmaHandlers->FindHandler(Namespace)) {
+ InsertNS = Existing->getIfNamespace();
+ assert(InsertNS != nullptr && "Cannot have a pragma namespace and pragma"
+ " handler with the same name!");
+ } else {
+ // Otherwise, this namespace doesn't exist yet, create and insert the
+ // handler for it.
+ InsertNS = new PragmaNamespace(Namespace);
+ PragmaHandlers->AddPragma(InsertNS);
+ }
+ }
+
+ // Check to make sure we don't already have a pragma for this identifier.
+ assert(!InsertNS->FindHandler(Handler->getName()) &&
+ "Pragma handler already exists for this identifier!");
+ InsertNS->AddPragma(Handler);
+}
+
+/// RemovePragmaHandler - Remove the specific pragma handler from the
+/// preprocessor. If \arg Namespace is non-null, then it should be the
+/// namespace that \arg Handler was added to. It is an error to remove
+/// a handler that has not been registered.
+void Preprocessor::RemovePragmaHandler(StringRef Namespace,
+ PragmaHandler *Handler) {
+ PragmaNamespace *NS = PragmaHandlers.get();
+
+ // If this is specified to be in a namespace, step down into it.
+ if (!Namespace.empty()) {
+ PragmaHandler *Existing = PragmaHandlers->FindHandler(Namespace);
+ assert(Existing && "Namespace containing handler does not exist!");
+
+ NS = Existing->getIfNamespace();
+ assert(NS && "Invalid namespace, registered as a regular pragma handler!");
+ }
+
+ NS->RemovePragmaHandler(Handler);
+
+ // If this is a non-default namespace and it is now empty, remove it.
+ if (NS != PragmaHandlers.get() && NS->IsEmpty()) {
+ PragmaHandlers->RemovePragmaHandler(NS);
+ delete NS;
+ }
+}
+
+bool Preprocessor::LexOnOffSwitch(tok::OnOffSwitch &Result) {
+ Token Tok;
+ LexUnexpandedToken(Tok);
+
+ if (Tok.isNot(tok::identifier)) {
+ Diag(Tok, diag::ext_on_off_switch_syntax);
+ return true;
+ }
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ if (II->isStr("ON"))
+ Result = tok::OOS_ON;
+ else if (II->isStr("OFF"))
+ Result = tok::OOS_OFF;
+ else if (II->isStr("DEFAULT"))
+ Result = tok::OOS_DEFAULT;
+ else {
+ Diag(Tok, diag::ext_on_off_switch_syntax);
+ return true;
+ }
+
+ // Verify that this is followed by EOD.
+ LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::eod))
+ Diag(Tok, diag::ext_pragma_syntax_eod);
+ return false;
+}
+
+namespace {
+
+/// PragmaOnceHandler - "\#pragma once" marks the file as atomically included.
+struct PragmaOnceHandler : public PragmaHandler {
+ PragmaOnceHandler() : PragmaHandler("once") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &OnceTok) override {
+ PP.CheckEndOfDirective("pragma once");
+ PP.HandlePragmaOnce(OnceTok);
+ }
+};
+
+/// PragmaMarkHandler - "\#pragma mark ..." is ignored by the compiler, and the
+/// rest of the line is not lexed.
+struct PragmaMarkHandler : public PragmaHandler {
+ PragmaMarkHandler() : PragmaHandler("mark") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &MarkTok) override {
+ PP.HandlePragmaMark();
+ }
+};
+
+/// PragmaPoisonHandler - "\#pragma poison x" marks x as not usable.
+struct PragmaPoisonHandler : public PragmaHandler {
+ PragmaPoisonHandler() : PragmaHandler("poison") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &PoisonTok) override {
+ PP.HandlePragmaPoison();
+ }
+};
+
+/// PragmaSystemHeaderHandler - "\#pragma system_header" marks the current file
+/// as a system header, which silences warnings in it.
+struct PragmaSystemHeaderHandler : public PragmaHandler {
+ PragmaSystemHeaderHandler() : PragmaHandler("system_header") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &SHToken) override {
+ PP.HandlePragmaSystemHeader(SHToken);
+ PP.CheckEndOfDirective("pragma");
+ }
+};
+
+struct PragmaDependencyHandler : public PragmaHandler {
+ PragmaDependencyHandler() : PragmaHandler("dependency") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &DepToken) override {
+ PP.HandlePragmaDependency(DepToken);
+ }
+};
+
+struct PragmaDebugHandler : public PragmaHandler {
+ PragmaDebugHandler() : PragmaHandler("__debug") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &DebugToken) override {
+ Token Tok;
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::identifier)) {
+ PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid);
+ return;
+ }
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+
+ if (II->isStr("assert")) {
+ llvm_unreachable("This is an assertion!");
+ } else if (II->isStr("crash")) {
+ LLVM_BUILTIN_TRAP;
+ } else if (II->isStr("parser_crash")) {
+ Token Crasher;
+ Crasher.startToken();
+ Crasher.setKind(tok::annot_pragma_parser_crash);
+ Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
+ PP.EnterToken(Crasher, /*IsReinject*/false);
+ } else if (II->isStr("dump")) {
+ Token Identifier;
+ PP.LexUnexpandedToken(Identifier);
+ if (auto *DumpII = Identifier.getIdentifierInfo()) {
+ Token DumpAnnot;
+ DumpAnnot.startToken();
+ DumpAnnot.setKind(tok::annot_pragma_dump);
+ DumpAnnot.setAnnotationRange(
+ SourceRange(Tok.getLocation(), Identifier.getLocation()));
+ DumpAnnot.setAnnotationValue(DumpII);
+ PP.DiscardUntilEndOfDirective();
+ PP.EnterToken(DumpAnnot, /*IsReinject*/false);
+ } else {
+ PP.Diag(Identifier, diag::warn_pragma_debug_missing_argument)
+ << II->getName();
+ }
+ } else if (II->isStr("diag_mapping")) {
+ Token DiagName;
+ PP.LexUnexpandedToken(DiagName);
+ if (DiagName.is(tok::eod))
+ PP.getDiagnostics().dump();
+ else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) {
+ StringLiteralParser Literal(DiagName, PP);
+ if (Literal.hadError)
+ return;
+ PP.getDiagnostics().dump(Literal.GetString());
+ } else {
+ PP.Diag(DiagName, diag::warn_pragma_debug_missing_argument)
+ << II->getName();
+ }
+ } else if (II->isStr("llvm_fatal_error")) {
+ llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
+ } else if (II->isStr("llvm_unreachable")) {
+ llvm_unreachable("#pragma clang __debug llvm_unreachable");
+ } else if (II->isStr("macro")) {
+ Token MacroName;
+ PP.LexUnexpandedToken(MacroName);
+ auto *MacroII = MacroName.getIdentifierInfo();
+ if (MacroII)
+ PP.dumpMacroInfo(MacroII);
+ else
+ PP.Diag(MacroName, diag::warn_pragma_debug_missing_argument)
+ << II->getName();
+ } else if (II->isStr("module_map")) {
+ llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8>
+ ModuleName;
+ if (LexModuleName(PP, Tok, ModuleName))
+ return;
+ ModuleMap &MM = PP.getHeaderSearchInfo().getModuleMap();
+ Module *M = nullptr;
+ for (auto IIAndLoc : ModuleName) {
+ M = MM.lookupModuleQualified(IIAndLoc.first->getName(), M);
+ if (!M) {
+ PP.Diag(IIAndLoc.second, diag::warn_pragma_debug_unknown_module)
+ << IIAndLoc.first;
+ return;
+ }
+ }
+ M->dump();
+ } else if (II->isStr("overflow_stack")) {
+ DebugOverflowStack();
+ } else if (II->isStr("handle_crash")) {
+ llvm::CrashRecoveryContext *CRC =llvm::CrashRecoveryContext::GetCurrent();
+ if (CRC)
+ CRC->HandleCrash();
+ } else if (II->isStr("captured")) {
+ HandleCaptured(PP);
+ } else {
+ PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command)
+ << II->getName();
+ }
+
+ PPCallbacks *Callbacks = PP.getPPCallbacks();
+ if (Callbacks)
+ Callbacks->PragmaDebug(Tok.getLocation(), II->getName());
+ }
+
+ void HandleCaptured(Preprocessor &PP) {
+ Token Tok;
+ PP.LexUnexpandedToken(Tok);
+
+ if (Tok.isNot(tok::eod)) {
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol)
+ << "pragma clang __debug captured";
+ return;
+ }
+
+ SourceLocation NameLoc = Tok.getLocation();
+ MutableArrayRef<Token> Toks(
+ PP.getPreprocessorAllocator().Allocate<Token>(1), 1);
+ Toks[0].startToken();
+ Toks[0].setKind(tok::annot_pragma_captured);
+ Toks[0].setLocation(NameLoc);
+
+ PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true,
+ /*IsReinject=*/false);
+ }
+
+// Disable MSVC warning about runtime stack overflow.
+#ifdef _MSC_VER
+ #pragma warning(disable : 4717)
+#endif
+ static void DebugOverflowStack(void (*P)() = nullptr) {
+ void (*volatile Self)(void(*P)()) = DebugOverflowStack;
+ Self(reinterpret_cast<void(*)()>(Self));
+ }
+#ifdef _MSC_VER
+ #pragma warning(default : 4717)
+#endif
+};
+
+/// PragmaDiagnosticHandler - e.g. '\#pragma GCC diagnostic ignored "-Wformat"'
+struct PragmaDiagnosticHandler : public PragmaHandler {
+private:
+ const char *Namespace;
+
+public:
+ explicit PragmaDiagnosticHandler(const char *NS)
+ : PragmaHandler("diagnostic"), Namespace(NS) {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &DiagToken) override {
+ SourceLocation DiagLoc = DiagToken.getLocation();
+ Token Tok;
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::identifier)) {
+ PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid);
+ return;
+ }
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ PPCallbacks *Callbacks = PP.getPPCallbacks();
+
+ if (II->isStr("pop")) {
+ if (!PP.getDiagnostics().popMappings(DiagLoc))
+ PP.Diag(Tok, diag::warn_pragma_diagnostic_cannot_pop);
+ else if (Callbacks)
+ Callbacks->PragmaDiagnosticPop(DiagLoc, Namespace);
+ return;
+ } else if (II->isStr("push")) {
+ PP.getDiagnostics().pushMappings(DiagLoc);
+ if (Callbacks)
+ Callbacks->PragmaDiagnosticPush(DiagLoc, Namespace);
+ return;
+ }
+
+ diag::Severity SV = llvm::StringSwitch<diag::Severity>(II->getName())
+ .Case("ignored", diag::Severity::Ignored)
+ .Case("warning", diag::Severity::Warning)
+ .Case("error", diag::Severity::Error)
+ .Case("fatal", diag::Severity::Fatal)
+ .Default(diag::Severity());
+
+ if (SV == diag::Severity()) {
+ PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid);
+ return;
+ }
+
+ PP.LexUnexpandedToken(Tok);
+ SourceLocation StringLoc = Tok.getLocation();
+
+ std::string WarningName;
+ if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic",
+ /*AllowMacroExpansion=*/false))
+ return;
+
+ if (Tok.isNot(tok::eod)) {
+ PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token);
+ return;
+ }
+
+ if (WarningName.size() < 3 || WarningName[0] != '-' ||
+ (WarningName[1] != 'W' && WarningName[1] != 'R')) {
+ PP.Diag(StringLoc, diag::warn_pragma_diagnostic_invalid_option);
+ return;
+ }
+
+ diag::Flavor Flavor = WarningName[1] == 'W' ? diag::Flavor::WarningOrError
+ : diag::Flavor::Remark;
+ StringRef Group = StringRef(WarningName).substr(2);
+ bool unknownDiag = false;
+ if (Group == "everything") {
+ // Special handling for pragma clang diagnostic ... "-Weverything".
+ // There is no formal group named "everything", so there has to be a
+ // special case for it.
+ PP.getDiagnostics().setSeverityForAll(Flavor, SV, DiagLoc);
+ } else
+ unknownDiag = PP.getDiagnostics().setSeverityForGroup(Flavor, Group, SV,
+ DiagLoc);
+ if (unknownDiag)
+ PP.Diag(StringLoc, diag::warn_pragma_diagnostic_unknown_warning)
+ << WarningName;
+ else if (Callbacks)
+ Callbacks->PragmaDiagnostic(DiagLoc, Namespace, SV, WarningName);
+ }
+};
+
+/// "\#pragma hdrstop [<header-name-string>]"
+struct PragmaHdrstopHandler : public PragmaHandler {
+ PragmaHdrstopHandler() : PragmaHandler("hdrstop") {}
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &DepToken) override {
+ PP.HandlePragmaHdrstop(DepToken);
+ }
+};
+
+/// "\#pragma warning(...)". MSVC's diagnostics do not map cleanly to clang's
+/// diagnostics, so we don't really implement this pragma. We parse it and
+/// ignore it to avoid -Wunknown-pragma warnings.
+struct PragmaWarningHandler : public PragmaHandler {
+ PragmaWarningHandler() : PragmaHandler("warning") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ // Parse things like:
+ // warning(push, 1)
+ // warning(pop)
+ // warning(disable : 1 2 3 ; error : 4 5 6 ; suppress : 7 8 9)
+ SourceLocation DiagLoc = Tok.getLocation();
+ PPCallbacks *Callbacks = PP.getPPCallbacks();
+
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ PP.Diag(Tok, diag::warn_pragma_warning_expected) << "(";
+ return;
+ }
+
+ PP.Lex(Tok);
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+
+ if (II && II->isStr("push")) {
+ // #pragma warning( push[ ,n ] )
+ int Level = -1;
+ PP.Lex(Tok);
+ if (Tok.is(tok::comma)) {
+ PP.Lex(Tok);
+ uint64_t Value;
+ if (Tok.is(tok::numeric_constant) &&
+ PP.parseSimpleIntegerLiteral(Tok, Value))
+ Level = int(Value);
+ if (Level < 0 || Level > 4) {
+ PP.Diag(Tok, diag::warn_pragma_warning_push_level);
+ return;
+ }
+ }
+ if (Callbacks)
+ Callbacks->PragmaWarningPush(DiagLoc, Level);
+ } else if (II && II->isStr("pop")) {
+ // #pragma warning( pop )
+ PP.Lex(Tok);
+ if (Callbacks)
+ Callbacks->PragmaWarningPop(DiagLoc);
+ } else {
+ // #pragma warning( warning-specifier : warning-number-list
+ // [; warning-specifier : warning-number-list...] )
+ while (true) {
+ II = Tok.getIdentifierInfo();
+ if (!II && !Tok.is(tok::numeric_constant)) {
+ PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid);
+ return;
+ }
+
+ // Figure out which warning specifier this is.
+ bool SpecifierValid;
+ StringRef Specifier;
+ llvm::SmallString<1> SpecifierBuf;
+ if (II) {
+ Specifier = II->getName();
+ SpecifierValid = llvm::StringSwitch<bool>(Specifier)
+ .Cases("default", "disable", "error", "once",
+ "suppress", true)
+ .Default(false);
+ // If we read a correct specifier, snatch next token (that should be
+ // ":", checked later).
+ if (SpecifierValid)
+ PP.Lex(Tok);
+ } else {
+ // Token is a numeric constant. It should be either 1, 2, 3 or 4.
+ uint64_t Value;
+ Specifier = PP.getSpelling(Tok, SpecifierBuf);
+ if (PP.parseSimpleIntegerLiteral(Tok, Value)) {
+ SpecifierValid = (Value >= 1) && (Value <= 4);
+ } else
+ SpecifierValid = false;
+ // Next token already snatched by parseSimpleIntegerLiteral.
+ }
+
+ if (!SpecifierValid) {
+ PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid);
+ return;
+ }
+ if (Tok.isNot(tok::colon)) {
+ PP.Diag(Tok, diag::warn_pragma_warning_expected) << ":";
+ return;
+ }
+
+ // Collect the warning ids.
+ SmallVector<int, 4> Ids;
+ PP.Lex(Tok);
+ while (Tok.is(tok::numeric_constant)) {
+ uint64_t Value;
+ if (!PP.parseSimpleIntegerLiteral(Tok, Value) || Value == 0 ||
+ Value > std::numeric_limits<int>::max()) {
+ PP.Diag(Tok, diag::warn_pragma_warning_expected_number);
+ return;
+ }
+ Ids.push_back(int(Value));
+ }
+ if (Callbacks)
+ Callbacks->PragmaWarning(DiagLoc, Specifier, Ids);
+
+ // Parse the next specifier if there is a semicolon.
+ if (Tok.isNot(tok::semi))
+ break;
+ PP.Lex(Tok);
+ }
+ }
+
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(Tok, diag::warn_pragma_warning_expected) << ")";
+ return;
+ }
+
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma warning";
+ }
+};
+
+/// "\#pragma execution_character_set(...)". MSVC supports this pragma only
+/// for "UTF-8". We parse it and ignore it if UTF-8 is provided and warn
+/// otherwise to avoid -Wunknown-pragma warnings.
+struct PragmaExecCharsetHandler : public PragmaHandler {
+ PragmaExecCharsetHandler() : PragmaHandler("execution_character_set") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ // Parse things like:
+ // execution_character_set(push, "UTF-8")
+ // execution_character_set(pop)
+ SourceLocation DiagLoc = Tok.getLocation();
+ PPCallbacks *Callbacks = PP.getPPCallbacks();
+
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << "(";
+ return;
+ }
+
+ PP.Lex(Tok);
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+
+ if (II && II->isStr("push")) {
+ // #pragma execution_character_set( push[ , string ] )
+ PP.Lex(Tok);
+ if (Tok.is(tok::comma)) {
+ PP.Lex(Tok);
+
+ std::string ExecCharset;
+ if (!PP.FinishLexStringLiteral(Tok, ExecCharset,
+ "pragma execution_character_set",
+ /*AllowMacroExpansion=*/false))
+ return;
+
+ // MSVC supports either of these, but nothing else.
+ if (ExecCharset != "UTF-8" && ExecCharset != "utf-8") {
+ PP.Diag(Tok, diag::warn_pragma_exec_charset_push_invalid) << ExecCharset;
+ return;
+ }
+ }
+ if (Callbacks)
+ Callbacks->PragmaExecCharsetPush(DiagLoc, "UTF-8");
+ } else if (II && II->isStr("pop")) {
+ // #pragma execution_character_set( pop )
+ PP.Lex(Tok);
+ if (Callbacks)
+ Callbacks->PragmaExecCharsetPop(DiagLoc);
+ } else {
+ PP.Diag(Tok, diag::warn_pragma_exec_charset_spec_invalid);
+ return;
+ }
+
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << ")";
+ return;
+ }
+
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma execution_character_set";
+ }
+};
+
+/// PragmaIncludeAliasHandler - "\#pragma include_alias("...")".
+struct PragmaIncludeAliasHandler : public PragmaHandler {
+ PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &IncludeAliasTok) override {
+ PP.HandlePragmaIncludeAlias(IncludeAliasTok);
+ }
+};
+
+/// PragmaMessageHandler - Handle the microsoft and gcc \#pragma message
+/// extension. The syntax is:
+/// \code
+/// #pragma message(string)
+/// \endcode
+/// OR, in GCC mode:
+/// \code
+/// #pragma message string
+/// \endcode
+/// string is a string, which is fully macro expanded, and permits string
+/// concatenation, embedded escape characters, etc... See MSDN for more details.
+/// Also handles \#pragma GCC warning and \#pragma GCC error which take the same
+/// form as \#pragma message.
+struct PragmaMessageHandler : public PragmaHandler {
+private:
+ const PPCallbacks::PragmaMessageKind Kind;
+ const StringRef Namespace;
+
+ static const char* PragmaKind(PPCallbacks::PragmaMessageKind Kind,
+ bool PragmaNameOnly = false) {
+ switch (Kind) {
+ case PPCallbacks::PMK_Message:
+ return PragmaNameOnly ? "message" : "pragma message";
+ case PPCallbacks::PMK_Warning:
+ return PragmaNameOnly ? "warning" : "pragma warning";
+ case PPCallbacks::PMK_Error:
+ return PragmaNameOnly ? "error" : "pragma error";
+ }
+ llvm_unreachable("Unknown PragmaMessageKind!");
+ }
+
+public:
+ PragmaMessageHandler(PPCallbacks::PragmaMessageKind Kind,
+ StringRef Namespace = StringRef())
+ : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind),
+ Namespace(Namespace) {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ SourceLocation MessageLoc = Tok.getLocation();
+ PP.Lex(Tok);
+ bool ExpectClosingParen = false;
+ switch (Tok.getKind()) {
+ case tok::l_paren:
+ // We have a MSVC style pragma message.
+ ExpectClosingParen = true;
+ // Read the string.
+ PP.Lex(Tok);
+ break;
+ case tok::string_literal:
+ // We have a GCC style pragma message, and we just read the string.
+ break;
+ default:
+ PP.Diag(MessageLoc, diag::err_pragma_message_malformed) << Kind;
+ return;
+ }
+
+ std::string MessageString;
+ if (!PP.FinishLexStringLiteral(Tok, MessageString, PragmaKind(Kind),
+ /*AllowMacroExpansion=*/true))
+ return;
+
+ if (ExpectClosingParen) {
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(Tok.getLocation(), diag::err_pragma_message_malformed) << Kind;
+ return;
+ }
+ PP.Lex(Tok); // eat the r_paren.
+ }
+
+ if (Tok.isNot(tok::eod)) {
+ PP.Diag(Tok.getLocation(), diag::err_pragma_message_malformed) << Kind;
+ return;
+ }
+
+ // Output the message.
+ PP.Diag(MessageLoc, (Kind == PPCallbacks::PMK_Error)
+ ? diag::err_pragma_message
+ : diag::warn_pragma_message) << MessageString;
+
+ // If the pragma is lexically sound, notify any interested PPCallbacks.
+ if (PPCallbacks *Callbacks = PP.getPPCallbacks())
+ Callbacks->PragmaMessage(MessageLoc, Namespace, Kind, MessageString);
+ }
+};
+
+/// Handle the clang \#pragma module import extension. The syntax is:
+/// \code
+/// #pragma clang module import some.module.name
+/// \endcode
+struct PragmaModuleImportHandler : public PragmaHandler {
+ PragmaModuleImportHandler() : PragmaHandler("import") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ SourceLocation ImportLoc = Tok.getLocation();
+
+ // Read the module name.
+ llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8>
+ ModuleName;
+ if (LexModuleName(PP, Tok, ModuleName))
+ return;
+
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+ // If we have a non-empty module path, load the named module.
+ Module *Imported =
+ PP.getModuleLoader().loadModule(ImportLoc, ModuleName, Module::Hidden,
+ /*IsInclusionDirective=*/false);
+ if (!Imported)
+ return;
+
+ PP.makeModuleVisible(Imported, ImportLoc);
+ PP.EnterAnnotationToken(SourceRange(ImportLoc, ModuleName.back().second),
+ tok::annot_module_include, Imported);
+ if (auto *CB = PP.getPPCallbacks())
+ CB->moduleImport(ImportLoc, ModuleName, Imported);
+ }
+};
+
+/// Handle the clang \#pragma module begin extension. The syntax is:
+/// \code
+/// #pragma clang module begin some.module.name
+/// ...
+/// #pragma clang module end
+/// \endcode
+struct PragmaModuleBeginHandler : public PragmaHandler {
+ PragmaModuleBeginHandler() : PragmaHandler("begin") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ SourceLocation BeginLoc = Tok.getLocation();
+
+ // Read the module name.
+ llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8>
+ ModuleName;
+ if (LexModuleName(PP, Tok, ModuleName))
+ return;
+
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+ // We can only enter submodules of the current module.
+ StringRef Current = PP.getLangOpts().CurrentModule;
+ if (ModuleName.front().first->getName() != Current) {
+ PP.Diag(ModuleName.front().second, diag::err_pp_module_begin_wrong_module)
+ << ModuleName.front().first << (ModuleName.size() > 1)
+ << Current.empty() << Current;
+ return;
+ }
+
+ // Find the module we're entering. We require that a module map for it
+ // be loaded or implicitly loadable.
+ auto &HSI = PP.getHeaderSearchInfo();
+ Module *M = HSI.lookupModule(Current);
+ if (!M) {
+ PP.Diag(ModuleName.front().second,
+ diag::err_pp_module_begin_no_module_map) << Current;
+ return;
+ }
+ for (unsigned I = 1; I != ModuleName.size(); ++I) {
+ auto *NewM = M->findOrInferSubmodule(ModuleName[I].first->getName());
+ if (!NewM) {
+ PP.Diag(ModuleName[I].second, diag::err_pp_module_begin_no_submodule)
+ << M->getFullModuleName() << ModuleName[I].first;
+ return;
+ }
+ M = NewM;
+ }
+
+ // If the module isn't available, it doesn't make sense to enter it.
+ if (Preprocessor::checkModuleIsAvailable(
+ PP.getLangOpts(), PP.getTargetInfo(), PP.getDiagnostics(), M)) {
+ PP.Diag(BeginLoc, diag::note_pp_module_begin_here)
+ << M->getTopLevelModuleName();
+ return;
+ }
+
+ // Enter the scope of the submodule.
+ PP.EnterSubmodule(M, BeginLoc, /*ForPragma*/true);
+ PP.EnterAnnotationToken(SourceRange(BeginLoc, ModuleName.back().second),
+ tok::annot_module_begin, M);
+ }
+};
+
+/// Handle the clang \#pragma module end extension.
+struct PragmaModuleEndHandler : public PragmaHandler {
+ PragmaModuleEndHandler() : PragmaHandler("end") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ SourceLocation Loc = Tok.getLocation();
+
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+ Module *M = PP.LeaveSubmodule(/*ForPragma*/true);
+ if (M)
+ PP.EnterAnnotationToken(SourceRange(Loc), tok::annot_module_end, M);
+ else
+ PP.Diag(Loc, diag::err_pp_module_end_without_module_begin);
+ }
+};
+
+/// Handle the clang \#pragma module build extension.
+struct PragmaModuleBuildHandler : public PragmaHandler {
+ PragmaModuleBuildHandler() : PragmaHandler("build") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ PP.HandlePragmaModuleBuild(Tok);
+ }
+};
+
+/// Handle the clang \#pragma module load extension.
+struct PragmaModuleLoadHandler : public PragmaHandler {
+ PragmaModuleLoadHandler() : PragmaHandler("load") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ SourceLocation Loc = Tok.getLocation();
+
+ // Read the module name.
+ llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8>
+ ModuleName;
+ if (LexModuleName(PP, Tok, ModuleName))
+ return;
+
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+ // Load the module, don't make it visible.
+ PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden,
+ /*IsInclusionDirective=*/false);
+ }
+};
+
+/// PragmaPushMacroHandler - "\#pragma push_macro" saves the value of the
+/// macro on the top of the stack.
+struct PragmaPushMacroHandler : public PragmaHandler {
+ PragmaPushMacroHandler() : PragmaHandler("push_macro") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &PushMacroTok) override {
+ PP.HandlePragmaPushMacro(PushMacroTok);
+ }
+};
+
+/// PragmaPopMacroHandler - "\#pragma pop_macro" sets the value of the
+/// macro to the value on the top of the stack.
+struct PragmaPopMacroHandler : public PragmaHandler {
+ PragmaPopMacroHandler() : PragmaHandler("pop_macro") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &PopMacroTok) override {
+ PP.HandlePragmaPopMacro(PopMacroTok);
+ }
+};
+
+/// PragmaARCCFCodeAuditedHandler -
+/// \#pragma clang arc_cf_code_audited begin/end
+struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
+ PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &NameTok) override {
+ SourceLocation Loc = NameTok.getLocation();
+ bool IsBegin;
+
+ Token Tok;
+
+ // Lex the 'begin' or 'end'.
+ PP.LexUnexpandedToken(Tok);
+ const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo();
+ if (BeginEnd && BeginEnd->isStr("begin")) {
+ IsBegin = true;
+ } else if (BeginEnd && BeginEnd->isStr("end")) {
+ IsBegin = false;
+ } else {
+ PP.Diag(Tok.getLocation(), diag::err_pp_arc_cf_code_audited_syntax);
+ return;
+ }
+
+ // Verify that this is followed by EOD.
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+ // The start location of the active audit.
+ SourceLocation BeginLoc = PP.getPragmaARCCFCodeAuditedInfo().second;
+
+ // The start location we want after processing this.
+ SourceLocation NewLoc;
+
+ if (IsBegin) {
+ // Complain about attempts to re-enter an audit.
+ if (BeginLoc.isValid()) {
+ PP.Diag(Loc, diag::err_pp_double_begin_of_arc_cf_code_audited);
+ PP.Diag(BeginLoc, diag::note_pragma_entered_here);
+ }
+ NewLoc = Loc;
+ } else {
+ // Complain about attempts to leave an audit that doesn't exist.
+ if (!BeginLoc.isValid()) {
+ PP.Diag(Loc, diag::err_pp_unmatched_end_of_arc_cf_code_audited);
+ return;
+ }
+ NewLoc = SourceLocation();
+ }
+
+ PP.setPragmaARCCFCodeAuditedInfo(NameTok.getIdentifierInfo(), NewLoc);
+ }
+};
+
+/// PragmaAssumeNonNullHandler -
+/// \#pragma clang assume_nonnull begin/end
+struct PragmaAssumeNonNullHandler : public PragmaHandler {
+ PragmaAssumeNonNullHandler() : PragmaHandler("assume_nonnull") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &NameTok) override {
+ SourceLocation Loc = NameTok.getLocation();
+ bool IsBegin;
+
+ Token Tok;
+
+ // Lex the 'begin' or 'end'.
+ PP.LexUnexpandedToken(Tok);
+ const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo();
+ if (BeginEnd && BeginEnd->isStr("begin")) {
+ IsBegin = true;
+ } else if (BeginEnd && BeginEnd->isStr("end")) {
+ IsBegin = false;
+ } else {
+ PP.Diag(Tok.getLocation(), diag::err_pp_assume_nonnull_syntax);
+ return;
+ }
+
+ // Verify that this is followed by EOD.
+ PP.LexUnexpandedToken(Tok);
+ if (Tok.isNot(tok::eod))
+ PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma";
+
+ // The start location of the active audit.
+ SourceLocation BeginLoc = PP.getPragmaAssumeNonNullLoc();
+
+ // The start location we want after processing this.
+ SourceLocation NewLoc;
+ PPCallbacks *Callbacks = PP.getPPCallbacks();
+
+ if (IsBegin) {
+ // Complain about attempts to re-enter an audit.
+ if (BeginLoc.isValid()) {
+ PP.Diag(Loc, diag::err_pp_double_begin_of_assume_nonnull);
+ PP.Diag(BeginLoc, diag::note_pragma_entered_here);
+ }
+ NewLoc = Loc;
+ if (Callbacks)
+ Callbacks->PragmaAssumeNonNullBegin(NewLoc);
+ } else {
+ // Complain about attempts to leave an audit that doesn't exist.
+ if (!BeginLoc.isValid()) {
+ PP.Diag(Loc, diag::err_pp_unmatched_end_of_assume_nonnull);
+ return;
+ }
+ NewLoc = SourceLocation();
+ if (Callbacks)
+ Callbacks->PragmaAssumeNonNullEnd(NewLoc);
+ }
+
+ PP.setPragmaAssumeNonNullLoc(NewLoc);
+ }
+};
+
+/// Handle "\#pragma region [...]"
+///
+/// The syntax is
+/// \code
+/// #pragma region [optional name]
+/// #pragma endregion [optional comment]
+/// \endcode
+///
+/// \note This is
+/// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a>
+/// pragma, just skipped by compiler.
+struct PragmaRegionHandler : public PragmaHandler {
+ PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &NameTok) override {
+ // #pragma region: endregion matches can be verified
+ // __pragma(region): no sense, but ignored by msvc
+ // _Pragma is not valid for MSVC, but there isn't any point
+ // to handle a _Pragma differently.
+ }
+};
+
+} // namespace
+
+/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
+/// \#pragma GCC poison/system_header/dependency and \#pragma once.
+void Preprocessor::RegisterBuiltinPragmas() {
+ AddPragmaHandler(new PragmaOnceHandler());
+ AddPragmaHandler(new PragmaMarkHandler());
+ AddPragmaHandler(new PragmaPushMacroHandler());
+ AddPragmaHandler(new PragmaPopMacroHandler());
+ AddPragmaHandler(new PragmaMessageHandler(PPCallbacks::PMK_Message));
+
+ // #pragma GCC ...
+ AddPragmaHandler("GCC", new PragmaPoisonHandler());
+ AddPragmaHandler("GCC", new PragmaSystemHeaderHandler());
+ AddPragmaHandler("GCC", new PragmaDependencyHandler());
+ AddPragmaHandler("GCC", new PragmaDiagnosticHandler("GCC"));
+ AddPragmaHandler("GCC", new PragmaMessageHandler(PPCallbacks::PMK_Warning,
+ "GCC"));
+ AddPragmaHandler("GCC", new PragmaMessageHandler(PPCallbacks::PMK_Error,
+ "GCC"));
+ // #pragma clang ...
+ AddPragmaHandler("clang", new PragmaPoisonHandler());
+ AddPragmaHandler("clang", new PragmaSystemHeaderHandler());
+ AddPragmaHandler("clang", new PragmaDebugHandler());
+ AddPragmaHandler("clang", new PragmaDependencyHandler());
+ AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang"));
+ AddPragmaHandler("clang", new PragmaARCCFCodeAuditedHandler());
+ AddPragmaHandler("clang", new PragmaAssumeNonNullHandler());
+
+ // #pragma clang module ...
+ auto *ModuleHandler = new PragmaNamespace("module");
+ AddPragmaHandler("clang", ModuleHandler);
+ ModuleHandler->AddPragma(new PragmaModuleImportHandler());
+ ModuleHandler->AddPragma(new PragmaModuleBeginHandler());
+ ModuleHandler->AddPragma(new PragmaModuleEndHandler());
+ ModuleHandler->AddPragma(new PragmaModuleBuildHandler());
+ ModuleHandler->AddPragma(new PragmaModuleLoadHandler());
+
+ // Add region pragmas.
+ AddPragmaHandler(new PragmaRegionHandler("region"));
+ AddPragmaHandler(new PragmaRegionHandler("endregion"));
+
+ // MS extensions.
+ if (LangOpts.MicrosoftExt) {
+ AddPragmaHandler(new PragmaWarningHandler());
+ AddPragmaHandler(new PragmaExecCharsetHandler());
+ AddPragmaHandler(new PragmaIncludeAliasHandler());
+ AddPragmaHandler(new PragmaHdrstopHandler());
+ }
+
+ // Pragmas added by plugins
+ for (PragmaHandlerRegistry::iterator it = PragmaHandlerRegistry::begin(),
+ ie = PragmaHandlerRegistry::end();
+ it != ie; ++it) {
+ AddPragmaHandler(it->instantiate().release());
+ }
+}
+
+/// Ignore all pragmas, useful for modes such as -Eonly which would otherwise
+/// warn about those pragmas being unknown.
+void Preprocessor::IgnorePragmas() {
+ AddPragmaHandler(new EmptyPragmaHandler());
+ // Also ignore all pragmas in all namespaces created
+ // in Preprocessor::RegisterBuiltinPragmas().
+ AddPragmaHandler("GCC", new EmptyPragmaHandler());
+ AddPragmaHandler("clang", new EmptyPragmaHandler());
+}
diff --git a/clang/lib/Lex/PreprocessingRecord.cpp b/clang/lib/Lex/PreprocessingRecord.cpp
new file mode 100644
index 000000000000..115256db4809
--- /dev/null
+++ b/clang/lib/Lex/PreprocessingRecord.cpp
@@ -0,0 +1,516 @@
+//===- PreprocessingRecord.cpp - Record of Preprocessing ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PreprocessingRecord class, which maintains a record
+// of what occurred during preprocessing, and its helpers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/PreprocessingRecord.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Token.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Capacity.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+
+ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() =
+ default;
+
+InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,
+ InclusionKind Kind, StringRef FileName,
+ bool InQuotes, bool ImportedModule,
+ const FileEntry *File, SourceRange Range)
+ : PreprocessingDirective(InclusionDirectiveKind, Range), InQuotes(InQuotes),
+ Kind(Kind), ImportedModule(ImportedModule), File(File) {
+ char *Memory = (char *)PPRec.Allocate(FileName.size() + 1, alignof(char));
+ memcpy(Memory, FileName.data(), FileName.size());
+ Memory[FileName.size()] = 0;
+ this->FileName = StringRef(Memory, FileName.size());
+}
+
+PreprocessingRecord::PreprocessingRecord(SourceManager &SM) : SourceMgr(SM) {}
+
+/// Returns a pair of [Begin, End) iterators of preprocessed entities
+/// that source range \p Range encompasses.
+llvm::iterator_range<PreprocessingRecord::iterator>
+PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) {
+ if (Range.isInvalid())
+ return llvm::make_range(iterator(), iterator());
+
+ if (CachedRangeQuery.Range == Range) {
+ return llvm::make_range(iterator(this, CachedRangeQuery.Result.first),
+ iterator(this, CachedRangeQuery.Result.second));
+ }
+
+ std::pair<int, int> Res = getPreprocessedEntitiesInRangeSlow(Range);
+
+ CachedRangeQuery.Range = Range;
+ CachedRangeQuery.Result = Res;
+
+ return llvm::make_range(iterator(this, Res.first),
+ iterator(this, Res.second));
+}
+
+static bool isPreprocessedEntityIfInFileID(PreprocessedEntity *PPE, FileID FID,
+ SourceManager &SM) {
+ assert(FID.isValid());
+ if (!PPE)
+ return false;
+
+ SourceLocation Loc = PPE->getSourceRange().getBegin();
+ if (Loc.isInvalid())
+ return false;
+
+ return SM.isInFileID(SM.getFileLoc(Loc), FID);
+}
+
+/// Returns true if the preprocessed entity that \arg PPEI iterator
+/// points to is coming from the file \arg FID.
+///
+/// Can be used to avoid implicit deserializations of preallocated
+/// preprocessed entities if we only care about entities of a specific file
+/// and not from files \#included in the range given at
+/// \see getPreprocessedEntitiesInRange.
+bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {
+ if (FID.isInvalid())
+ return false;
+
+ int Pos = std::distance(iterator(this, 0), PPEI);
+ if (Pos < 0) {
+ if (unsigned(-Pos-1) >= LoadedPreprocessedEntities.size()) {
+ assert(0 && "Out-of bounds loaded preprocessed entity");
+ return false;
+ }
+ assert(ExternalSource && "No external source to load from");
+ unsigned LoadedIndex = LoadedPreprocessedEntities.size()+Pos;
+ if (PreprocessedEntity *PPE = LoadedPreprocessedEntities[LoadedIndex])
+ return isPreprocessedEntityIfInFileID(PPE, FID, SourceMgr);
+
+ // See if the external source can see if the entity is in the file without
+ // deserializing it.
+ Optional<bool> IsInFile =
+ ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID);
+ if (IsInFile.hasValue())
+ return IsInFile.getValue();
+
+ // The external source did not provide a definite answer, go and deserialize
+ // the entity to check it.
+ return isPreprocessedEntityIfInFileID(
+ getLoadedPreprocessedEntity(LoadedIndex),
+ FID, SourceMgr);
+ }
+
+ if (unsigned(Pos) >= PreprocessedEntities.size()) {
+ assert(0 && "Out-of bounds local preprocessed entity");
+ return false;
+ }
+ return isPreprocessedEntityIfInFileID(PreprocessedEntities[Pos],
+ FID, SourceMgr);
+}
+
+/// Returns a pair of [Begin, End) iterators of preprocessed entities
+/// that source range \arg R encompasses.
+std::pair<int, int>
+PreprocessingRecord::getPreprocessedEntitiesInRangeSlow(SourceRange Range) {
+ assert(Range.isValid());
+ assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));
+
+ std::pair<unsigned, unsigned>
+ Local = findLocalPreprocessedEntitiesInRange(Range);
+
+ // Check if range spans local entities.
+ if (!ExternalSource || SourceMgr.isLocalSourceLocation(Range.getBegin()))
+ return std::make_pair(Local.first, Local.second);
+
+ std::pair<unsigned, unsigned>
+ Loaded = ExternalSource->findPreprocessedEntitiesInRange(Range);
+
+ // Check if range spans local entities.
+ if (Loaded.first == Loaded.second)
+ return std::make_pair(Local.first, Local.second);
+
+ unsigned TotalLoaded = LoadedPreprocessedEntities.size();
+
+ // Check if range spans loaded entities.
+ if (Local.first == Local.second)
+ return std::make_pair(int(Loaded.first)-TotalLoaded,
+ int(Loaded.second)-TotalLoaded);
+
+ // Range spands loaded and local entities.
+ return std::make_pair(int(Loaded.first)-TotalLoaded, Local.second);
+}
+
+std::pair<unsigned, unsigned>
+PreprocessingRecord::findLocalPreprocessedEntitiesInRange(
+ SourceRange Range) const {
+ if (Range.isInvalid())
+ return std::make_pair(0,0);
+ assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));
+
+ unsigned Begin = findBeginLocalPreprocessedEntity(Range.getBegin());
+ unsigned End = findEndLocalPreprocessedEntity(Range.getEnd());
+ return std::make_pair(Begin, End);
+}
+
+namespace {
+
+template <SourceLocation (SourceRange::*getRangeLoc)() const>
+struct PPEntityComp {
+ const SourceManager &SM;
+
+ explicit PPEntityComp(const SourceManager &SM) : SM(SM) {}
+
+ bool operator()(PreprocessedEntity *L, PreprocessedEntity *R) const {
+ SourceLocation LHS = getLoc(L);
+ SourceLocation RHS = getLoc(R);
+ return SM.isBeforeInTranslationUnit(LHS, RHS);
+ }
+
+ bool operator()(PreprocessedEntity *L, SourceLocation RHS) const {
+ SourceLocation LHS = getLoc(L);
+ return SM.isBeforeInTranslationUnit(LHS, RHS);
+ }
+
+ bool operator()(SourceLocation LHS, PreprocessedEntity *R) const {
+ SourceLocation RHS = getLoc(R);
+ return SM.isBeforeInTranslationUnit(LHS, RHS);
+ }
+
+ SourceLocation getLoc(PreprocessedEntity *PPE) const {
+ SourceRange Range = PPE->getSourceRange();
+ return (Range.*getRangeLoc)();
+ }
+};
+
+} // namespace
+
+unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity(
+ SourceLocation Loc) const {
+ if (SourceMgr.isLoadedSourceLocation(Loc))
+ return 0;
+
+ size_t Count = PreprocessedEntities.size();
+ size_t Half;
+ std::vector<PreprocessedEntity *>::const_iterator
+ First = PreprocessedEntities.begin();
+ std::vector<PreprocessedEntity *>::const_iterator I;
+
+ // Do a binary search manually instead of using std::lower_bound because
+ // The end locations of entities may be unordered (when a macro expansion
+ // is inside another macro argument), but for this case it is not important
+ // whether we get the first macro expansion or its containing macro.
+ while (Count > 0) {
+ Half = Count/2;
+ I = First;
+ std::advance(I, Half);
+ if (SourceMgr.isBeforeInTranslationUnit((*I)->getSourceRange().getEnd(),
+ Loc)){
+ First = I;
+ ++First;
+ Count = Count - Half - 1;
+ } else
+ Count = Half;
+ }
+
+ return First - PreprocessedEntities.begin();
+}
+
+unsigned
+PreprocessingRecord::findEndLocalPreprocessedEntity(SourceLocation Loc) const {
+ if (SourceMgr.isLoadedSourceLocation(Loc))
+ return 0;
+
+ auto I = llvm::upper_bound(PreprocessedEntities, Loc,
+ PPEntityComp<&SourceRange::getBegin>(SourceMgr));
+ return I - PreprocessedEntities.begin();
+}
+
+PreprocessingRecord::PPEntityID
+PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {
+ assert(Entity);
+ SourceLocation BeginLoc = Entity->getSourceRange().getBegin();
+
+ if (isa<MacroDefinitionRecord>(Entity)) {
+ assert((PreprocessedEntities.empty() ||
+ !SourceMgr.isBeforeInTranslationUnit(
+ BeginLoc,
+ PreprocessedEntities.back()->getSourceRange().getBegin())) &&
+ "a macro definition was encountered out-of-order");
+ PreprocessedEntities.push_back(Entity);
+ return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false);
+ }
+
+ // Check normal case, this entity begin location is after the previous one.
+ if (PreprocessedEntities.empty() ||
+ !SourceMgr.isBeforeInTranslationUnit(BeginLoc,
+ PreprocessedEntities.back()->getSourceRange().getBegin())) {
+ PreprocessedEntities.push_back(Entity);
+ return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false);
+ }
+
+ // The entity's location is not after the previous one; this can happen with
+ // include directives that form the filename using macros, e.g:
+ // "#include MACRO(STUFF)"
+ // or with macro expansions inside macro arguments where the arguments are
+ // not expanded in the same order as listed, e.g:
+ // \code
+ // #define M1 1
+ // #define M2 2
+ // #define FM(x,y) y x
+ // FM(M1, M2)
+ // \endcode
+
+ using pp_iter = std::vector<PreprocessedEntity *>::iterator;
+
+ // Usually there are few macro expansions when defining the filename, do a
+ // linear search for a few entities.
+ unsigned count = 0;
+ for (pp_iter RI = PreprocessedEntities.end(),
+ Begin = PreprocessedEntities.begin();
+ RI != Begin && count < 4; --RI, ++count) {
+ pp_iter I = RI;
+ --I;
+ if (!SourceMgr.isBeforeInTranslationUnit(BeginLoc,
+ (*I)->getSourceRange().getBegin())) {
+ pp_iter insertI = PreprocessedEntities.insert(RI, Entity);
+ return getPPEntityID(insertI - PreprocessedEntities.begin(),
+ /*isLoaded=*/false);
+ }
+ }
+
+ // Linear search unsuccessful. Do a binary search.
+ pp_iter I =
+ llvm::upper_bound(PreprocessedEntities, BeginLoc,
+ PPEntityComp<&SourceRange::getBegin>(SourceMgr));
+ pp_iter insertI = PreprocessedEntities.insert(I, Entity);
+ return getPPEntityID(insertI - PreprocessedEntities.begin(),
+ /*isLoaded=*/false);
+}
+
+void PreprocessingRecord::SetExternalSource(
+ ExternalPreprocessingRecordSource &Source) {
+ assert(!ExternalSource &&
+ "Preprocessing record already has an external source");
+ ExternalSource = &Source;
+}
+
+unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) {
+ unsigned Result = LoadedPreprocessedEntities.size();
+ LoadedPreprocessedEntities.resize(LoadedPreprocessedEntities.size()
+ + NumEntities);
+ return Result;
+}
+
+unsigned PreprocessingRecord::allocateSkippedRanges(unsigned NumRanges) {
+ unsigned Result = SkippedRanges.size();
+ SkippedRanges.resize(SkippedRanges.size() + NumRanges);
+ SkippedRangesAllLoaded = false;
+ return Result;
+}
+
+void PreprocessingRecord::ensureSkippedRangesLoaded() {
+ if (SkippedRangesAllLoaded || !ExternalSource)
+ return;
+ for (unsigned Index = 0; Index != SkippedRanges.size(); ++Index) {
+ if (SkippedRanges[Index].isInvalid())
+ SkippedRanges[Index] = ExternalSource->ReadSkippedRange(Index);
+ }
+ SkippedRangesAllLoaded = true;
+}
+
+void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,
+ MacroDefinitionRecord *Def) {
+ MacroDefinitions[Macro] = Def;
+}
+
+/// Retrieve the preprocessed entity at the given ID.
+PreprocessedEntity *PreprocessingRecord::getPreprocessedEntity(PPEntityID PPID){
+ if (PPID.ID < 0) {
+ unsigned Index = -PPID.ID - 1;
+ assert(Index < LoadedPreprocessedEntities.size() &&
+ "Out-of bounds loaded preprocessed entity");
+ return getLoadedPreprocessedEntity(Index);
+ }
+
+ if (PPID.ID == 0)
+ return nullptr;
+ unsigned Index = PPID.ID - 1;
+ assert(Index < PreprocessedEntities.size() &&
+ "Out-of bounds local preprocessed entity");
+ return PreprocessedEntities[Index];
+}
+
+/// Retrieve the loaded preprocessed entity at the given index.
+PreprocessedEntity *
+PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) {
+ assert(Index < LoadedPreprocessedEntities.size() &&
+ "Out-of bounds loaded preprocessed entity");
+ assert(ExternalSource && "No external source to load from");
+ PreprocessedEntity *&Entity = LoadedPreprocessedEntities[Index];
+ if (!Entity) {
+ Entity = ExternalSource->ReadPreprocessedEntity(Index);
+ if (!Entity) // Failed to load.
+ Entity = new (*this)
+ PreprocessedEntity(PreprocessedEntity::InvalidKind, SourceRange());
+ }
+ return Entity;
+}
+
+MacroDefinitionRecord *
+PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) {
+ llvm::DenseMap<const MacroInfo *, MacroDefinitionRecord *>::iterator Pos =
+ MacroDefinitions.find(MI);
+ if (Pos == MacroDefinitions.end())
+ return nullptr;
+
+ return Pos->second;
+}
+
+void PreprocessingRecord::addMacroExpansion(const Token &Id,
+ const MacroInfo *MI,
+ SourceRange Range) {
+ // We don't record nested macro expansions.
+ if (Id.getLocation().isMacroID())
+ return;
+
+ if (MI->isBuiltinMacro())
+ addPreprocessedEntity(new (*this)
+ MacroExpansion(Id.getIdentifierInfo(), Range));
+ else if (MacroDefinitionRecord *Def = findMacroDefinition(MI))
+ addPreprocessedEntity(new (*this) MacroExpansion(Def, Range));
+}
+
+void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDefinition &MD) {
+ // This is not actually a macro expansion but record it as a macro reference.
+ if (MD)
+ addMacroExpansion(MacroNameTok, MD.getMacroInfo(),
+ MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::Ifndef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDefinition &MD) {
+ // This is not actually a macro expansion but record it as a macro reference.
+ if (MD)
+ addMacroExpansion(MacroNameTok, MD.getMacroInfo(),
+ MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::Defined(const Token &MacroNameTok,
+ const MacroDefinition &MD,
+ SourceRange Range) {
+ // This is not actually a macro expansion but record it as a macro reference.
+ if (MD)
+ addMacroExpansion(MacroNameTok, MD.getMacroInfo(),
+ MacroNameTok.getLocation());
+}
+
+void PreprocessingRecord::SourceRangeSkipped(SourceRange Range,
+ SourceLocation EndifLoc) {
+ assert(Range.isValid());
+ SkippedRanges.emplace_back(Range.getBegin(), EndifLoc);
+}
+
+void PreprocessingRecord::MacroExpands(const Token &Id,
+ const MacroDefinition &MD,
+ SourceRange Range,
+ const MacroArgs *Args) {
+ addMacroExpansion(Id, MD.getMacroInfo(), Range);
+}
+
+void PreprocessingRecord::MacroDefined(const Token &Id,
+ const MacroDirective *MD) {
+ const MacroInfo *MI = MD->getMacroInfo();
+ SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc());
+ MacroDefinitionRecord *Def =
+ new (*this) MacroDefinitionRecord(Id.getIdentifierInfo(), R);
+ addPreprocessedEntity(Def);
+ MacroDefinitions[MI] = Def;
+}
+
+void PreprocessingRecord::MacroUndefined(const Token &Id,
+ const MacroDefinition &MD,
+ const MacroDirective *Undef) {
+ MD.forAllDefinitions([&](MacroInfo *MI) { MacroDefinitions.erase(MI); });
+}
+
+void PreprocessingRecord::InclusionDirective(
+ SourceLocation HashLoc,
+ const Token &IncludeTok,
+ StringRef FileName,
+ bool IsAngled,
+ CharSourceRange FilenameRange,
+ const FileEntry *File,
+ StringRef SearchPath,
+ StringRef RelativePath,
+ const Module *Imported,
+ SrcMgr::CharacteristicKind FileType) {
+ InclusionDirective::InclusionKind Kind = InclusionDirective::Include;
+
+ switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
+ case tok::pp_include:
+ Kind = InclusionDirective::Include;
+ break;
+
+ case tok::pp_import:
+ Kind = InclusionDirective::Import;
+ break;
+
+ case tok::pp_include_next:
+ Kind = InclusionDirective::IncludeNext;
+ break;
+
+ case tok::pp___include_macros:
+ Kind = InclusionDirective::IncludeMacros;
+ break;
+
+ default:
+ llvm_unreachable("Unknown include directive kind");
+ }
+
+ SourceLocation EndLoc;
+ if (!IsAngled) {
+ EndLoc = FilenameRange.getBegin();
+ } else {
+ EndLoc = FilenameRange.getEnd();
+ if (FilenameRange.isCharRange())
+ EndLoc = EndLoc.getLocWithOffset(-1); // the InclusionDirective expects
+ // a token range.
+ }
+ clang::InclusionDirective *ID =
+ new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled,
+ (bool)Imported, File,
+ SourceRange(HashLoc, EndLoc));
+ addPreprocessedEntity(ID);
+}
+
+size_t PreprocessingRecord::getTotalMemory() const {
+ return BumpAlloc.getTotalMemory()
+ + llvm::capacity_in_bytes(MacroDefinitions)
+ + llvm::capacity_in_bytes(PreprocessedEntities)
+ + llvm::capacity_in_bytes(LoadedPreprocessedEntities)
+ + llvm::capacity_in_bytes(SkippedRanges);
+}
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
new file mode 100644
index 000000000000..82007732a9b1
--- /dev/null
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -0,0 +1,1401 @@
+//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Preprocessor interface.
+//
+//===----------------------------------------------------------------------===//
+//
+// Options to support:
+// -H - Print the name of each header file used.
+// -d[DNI] - Dump various things.
+// -fworking-directory - #line's with preprocessor's working dir.
+// -fpreprocessed
+// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
+// -W*
+// -w
+//
+// Messages to emit:
+// "Multiple include guards may be useful for:\n"
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemStatCache.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/CodeCompletionHandler.h"
+#include "clang/Lex/ExternalPreprocessorSource.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/PreprocessingRecord.h"
+#include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Lex/Token.h"
+#include "clang/Lex/TokenLexer.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Capacity.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+
+LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
+
+ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
+
+Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
+ DiagnosticsEngine &diags, LangOptions &opts,
+ SourceManager &SM, HeaderSearch &Headers,
+ ModuleLoader &TheModuleLoader,
+ IdentifierInfoLookup *IILookup, bool OwnsHeaders,
+ TranslationUnitKind TUKind)
+ : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
+ FileMgr(Headers.getFileMgr()), SourceMgr(SM),
+ ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
+ TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
+ // As the language options may have not been loaded yet (when
+ // deserializing an ASTUnit), adding keywords to the identifier table is
+ // deferred to Preprocessor::Initialize().
+ Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
+ TUKind(TUKind), SkipMainFilePreamble(0, true),
+ CurSubmoduleState(&NullSubmoduleState) {
+ OwnsHeaderSearch = OwnsHeaders;
+
+ // Default to discarding comments.
+ KeepComments = false;
+ KeepMacroComments = false;
+ SuppressIncludeNotFoundError = false;
+
+ // Macro expansion is enabled.
+ DisableMacroExpansion = false;
+ MacroExpansionInDirectivesOverride = false;
+ InMacroArgs = false;
+ ArgMacro = nullptr;
+ InMacroArgPreExpansion = false;
+ NumCachedTokenLexers = 0;
+ PragmasEnabled = true;
+ ParsingIfOrElifDirective = false;
+ PreprocessedOutput = false;
+
+ // We haven't read anything from the external source.
+ ReadMacrosFromExternalSource = false;
+
+ // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
+ // a macro. They get unpoisoned where it is allowed.
+ (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
+ SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
+ if (getLangOpts().CPlusPlus2a) {
+ (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
+ SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
+ } else {
+ Ident__VA_OPT__ = nullptr;
+ }
+
+ // Initialize the pragma handlers.
+ RegisterBuiltinPragmas();
+
+ // Initialize builtin macros like __LINE__ and friends.
+ RegisterBuiltinMacros();
+
+ if(LangOpts.Borland) {
+ Ident__exception_info = getIdentifierInfo("_exception_info");
+ Ident___exception_info = getIdentifierInfo("__exception_info");
+ Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
+ Ident__exception_code = getIdentifierInfo("_exception_code");
+ Ident___exception_code = getIdentifierInfo("__exception_code");
+ Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
+ Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
+ Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
+ Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
+ } else {
+ Ident__exception_info = Ident__exception_code = nullptr;
+ Ident__abnormal_termination = Ident___exception_info = nullptr;
+ Ident___exception_code = Ident___abnormal_termination = nullptr;
+ Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
+ Ident_AbnormalTermination = nullptr;
+ }
+
+ // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
+ if (usingPCHWithPragmaHdrStop())
+ SkippingUntilPragmaHdrStop = true;
+
+ // If using a PCH with a through header, start skipping tokens.
+ if (!this->PPOpts->PCHThroughHeader.empty() &&
+ !this->PPOpts->ImplicitPCHInclude.empty())
+ SkippingUntilPCHThroughHeader = true;
+
+ if (this->PPOpts->GeneratePreamble)
+ PreambleConditionalStack.startRecording();
+
+ ExcludedConditionalDirectiveSkipMappings =
+ this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
+ if (ExcludedConditionalDirectiveSkipMappings)
+ ExcludedConditionalDirectiveSkipMappings->clear();
+}
+
+Preprocessor::~Preprocessor() {
+ assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
+
+ IncludeMacroStack.clear();
+
+ // Destroy any macro definitions.
+ while (MacroInfoChain *I = MIChainHead) {
+ MIChainHead = I->Next;
+ I->~MacroInfoChain();
+ }
+
+ // Free any cached macro expanders.
+ // This populates MacroArgCache, so all TokenLexers need to be destroyed
+ // before the code below that frees up the MacroArgCache list.
+ std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
+ CurTokenLexer.reset();
+
+ // Free any cached MacroArgs.
+ for (MacroArgs *ArgList = MacroArgCache; ArgList;)
+ ArgList = ArgList->deallocate();
+
+ // Delete the header search info, if we own it.
+ if (OwnsHeaderSearch)
+ delete &HeaderInfo;
+}
+
+void Preprocessor::Initialize(const TargetInfo &Target,
+ const TargetInfo *AuxTarget) {
+ assert((!this->Target || this->Target == &Target) &&
+ "Invalid override of target information");
+ this->Target = &Target;
+
+ assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
+ "Invalid override of aux target information.");
+ this->AuxTarget = AuxTarget;
+
+ // Initialize information about built-ins.
+ BuiltinInfo.InitializeTarget(Target, AuxTarget);
+ HeaderInfo.setTarget(Target);
+
+ // Populate the identifier table with info about keywords for the current language.
+ Identifiers.AddKeywords(LangOpts);
+}
+
+void Preprocessor::InitializeForModelFile() {
+ NumEnteredSourceFiles = 0;
+
+ // Reset pragmas
+ PragmaHandlersBackup = std::move(PragmaHandlers);
+ PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
+ RegisterBuiltinPragmas();
+
+ // Reset PredefinesFileID
+ PredefinesFileID = FileID();
+}
+
+void Preprocessor::FinalizeForModelFile() {
+ NumEnteredSourceFiles = 1;
+
+ PragmaHandlers = std::move(PragmaHandlersBackup);
+}
+
+void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
+ llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
+ << getSpelling(Tok) << "'";
+
+ if (!DumpFlags) return;
+
+ llvm::errs() << "\t";
+ if (Tok.isAtStartOfLine())
+ llvm::errs() << " [StartOfLine]";
+ if (Tok.hasLeadingSpace())
+ llvm::errs() << " [LeadingSpace]";
+ if (Tok.isExpandDisabled())
+ llvm::errs() << " [ExpandDisabled]";
+ if (Tok.needsCleaning()) {
+ const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
+ llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
+ << "']";
+ }
+
+ llvm::errs() << "\tLoc=<";
+ DumpLocation(Tok.getLocation());
+ llvm::errs() << ">";
+}
+
+void Preprocessor::DumpLocation(SourceLocation Loc) const {
+ Loc.print(llvm::errs(), SourceMgr);
+}
+
+void Preprocessor::DumpMacro(const MacroInfo &MI) const {
+ llvm::errs() << "MACRO: ";
+ for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
+ DumpToken(MI.getReplacementToken(i));
+ llvm::errs() << " ";
+ }
+ llvm::errs() << "\n";
+}
+
+void Preprocessor::PrintStats() {
+ llvm::errs() << "\n*** Preprocessor Stats:\n";
+ llvm::errs() << NumDirectives << " directives found:\n";
+ llvm::errs() << " " << NumDefined << " #define.\n";
+ llvm::errs() << " " << NumUndefined << " #undef.\n";
+ llvm::errs() << " #include/#include_next/#import:\n";
+ llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
+ llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
+ llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
+ llvm::errs() << " " << NumElse << " #else/#elif.\n";
+ llvm::errs() << " " << NumEndif << " #endif.\n";
+ llvm::errs() << " " << NumPragma << " #pragma.\n";
+ llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
+
+ llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
+ << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
+ << NumFastMacroExpanded << " on the fast path.\n";
+ llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
+ << " token paste (##) operations performed, "
+ << NumFastTokenPaste << " on the fast path.\n";
+
+ llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
+
+ llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
+ llvm::errs() << "\n Macro Expanded Tokens: "
+ << llvm::capacity_in_bytes(MacroExpandedTokens);
+ llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
+ // FIXME: List information for all submodules.
+ llvm::errs() << "\n Macros: "
+ << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
+ llvm::errs() << "\n #pragma push_macro Info: "
+ << llvm::capacity_in_bytes(PragmaPushMacroInfo);
+ llvm::errs() << "\n Poison Reasons: "
+ << llvm::capacity_in_bytes(PoisonReasons);
+ llvm::errs() << "\n Comment Handlers: "
+ << llvm::capacity_in_bytes(CommentHandlers) << "\n";
+}
+
+Preprocessor::macro_iterator
+Preprocessor::macro_begin(bool IncludeExternalMacros) const {
+ if (IncludeExternalMacros && ExternalSource &&
+ !ReadMacrosFromExternalSource) {
+ ReadMacrosFromExternalSource = true;
+ ExternalSource->ReadDefinedMacros();
+ }
+
+ // Make sure we cover all macros in visible modules.
+ for (const ModuleMacro &Macro : ModuleMacros)
+ CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
+
+ return CurSubmoduleState->Macros.begin();
+}
+
+size_t Preprocessor::getTotalMemory() const {
+ return BP.getTotalMemory()
+ + llvm::capacity_in_bytes(MacroExpandedTokens)
+ + Predefines.capacity() /* Predefines buffer. */
+ // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
+ // and ModuleMacros.
+ + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
+ + llvm::capacity_in_bytes(PragmaPushMacroInfo)
+ + llvm::capacity_in_bytes(PoisonReasons)
+ + llvm::capacity_in_bytes(CommentHandlers);
+}
+
+Preprocessor::macro_iterator
+Preprocessor::macro_end(bool IncludeExternalMacros) const {
+ if (IncludeExternalMacros && ExternalSource &&
+ !ReadMacrosFromExternalSource) {
+ ReadMacrosFromExternalSource = true;
+ ExternalSource->ReadDefinedMacros();
+ }
+
+ return CurSubmoduleState->Macros.end();
+}
+
+/// Compares macro tokens with a specified token value sequence.
+static bool MacroDefinitionEquals(const MacroInfo *MI,
+ ArrayRef<TokenValue> Tokens) {
+ return Tokens.size() == MI->getNumTokens() &&
+ std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
+}
+
+StringRef Preprocessor::getLastMacroWithSpelling(
+ SourceLocation Loc,
+ ArrayRef<TokenValue> Tokens) const {
+ SourceLocation BestLocation;
+ StringRef BestSpelling;
+ for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
+ I != E; ++I) {
+ const MacroDirective::DefInfo
+ Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
+ if (!Def || !Def.getMacroInfo())
+ continue;
+ if (!Def.getMacroInfo()->isObjectLike())
+ continue;
+ if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
+ continue;
+ SourceLocation Location = Def.getLocation();
+ // Choose the macro defined latest.
+ if (BestLocation.isInvalid() ||
+ (Location.isValid() &&
+ SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
+ BestLocation = Location;
+ BestSpelling = I->first->getName();
+ }
+ }
+ return BestSpelling;
+}
+
+void Preprocessor::recomputeCurLexerKind() {
+ if (CurLexer)
+ CurLexerKind = CLK_Lexer;
+ else if (CurTokenLexer)
+ CurLexerKind = CLK_TokenLexer;
+ else
+ CurLexerKind = CLK_CachingLexer;
+}
+
+bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
+ unsigned CompleteLine,
+ unsigned CompleteColumn) {
+ assert(File);
+ assert(CompleteLine && CompleteColumn && "Starts from 1:1");
+ assert(!CodeCompletionFile && "Already set");
+
+ using llvm::MemoryBuffer;
+
+ // Load the actual file's contents.
+ bool Invalid = false;
+ const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
+ if (Invalid)
+ return true;
+
+ // Find the byte position of the truncation point.
+ const char *Position = Buffer->getBufferStart();
+ for (unsigned Line = 1; Line < CompleteLine; ++Line) {
+ for (; *Position; ++Position) {
+ if (*Position != '\r' && *Position != '\n')
+ continue;
+
+ // Eat \r\n or \n\r as a single line.
+ if ((Position[1] == '\r' || Position[1] == '\n') &&
+ Position[0] != Position[1])
+ ++Position;
+ ++Position;
+ break;
+ }
+ }
+
+ Position += CompleteColumn - 1;
+
+ // If pointing inside the preamble, adjust the position at the beginning of
+ // the file after the preamble.
+ if (SkipMainFilePreamble.first &&
+ SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
+ if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
+ Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
+ }
+
+ if (Position > Buffer->getBufferEnd())
+ Position = Buffer->getBufferEnd();
+
+ CodeCompletionFile = File;
+ CodeCompletionOffset = Position - Buffer->getBufferStart();
+
+ auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
+ Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
+ char *NewBuf = NewBuffer->getBufferStart();
+ char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
+ *NewPos = '\0';
+ std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
+ SourceMgr.overrideFileContents(File, std::move(NewBuffer));
+
+ return false;
+}
+
+void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
+ bool IsAngled) {
+ if (CodeComplete)
+ CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
+ setCodeCompletionReached();
+}
+
+void Preprocessor::CodeCompleteNaturalLanguage() {
+ if (CodeComplete)
+ CodeComplete->CodeCompleteNaturalLanguage();
+ setCodeCompletionReached();
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// SmallVector. Note that the returned StringRef may not point to the
+/// supplied buffer if a copy can be avoided.
+StringRef Preprocessor::getSpelling(const Token &Tok,
+ SmallVectorImpl<char> &Buffer,
+ bool *Invalid) const {
+ // NOTE: this has to be checked *before* testing for an IdentifierInfo.
+ if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
+ // Try the fast path.
+ if (const IdentifierInfo *II = Tok.getIdentifierInfo())
+ return II->getName();
+ }
+
+ // Resize the buffer if we need to copy into it.
+ if (Tok.needsCleaning())
+ Buffer.resize(Tok.getLength());
+
+ const char *Ptr = Buffer.data();
+ unsigned Len = getSpelling(Tok, Ptr, Invalid);
+ return StringRef(Ptr, Len);
+}
+
+/// CreateString - Plop the specified string into a scratch buffer and return a
+/// location for it. If specified, the source location provides a source
+/// location for the token.
+void Preprocessor::CreateString(StringRef Str, Token &Tok,
+ SourceLocation ExpansionLocStart,
+ SourceLocation ExpansionLocEnd) {
+ Tok.setLength(Str.size());
+
+ const char *DestPtr;
+ SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
+
+ if (ExpansionLocStart.isValid())
+ Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
+ ExpansionLocEnd, Str.size());
+ Tok.setLocation(Loc);
+
+ // If this is a raw identifier or a literal token, set the pointer data.
+ if (Tok.is(tok::raw_identifier))
+ Tok.setRawIdentifierData(DestPtr);
+ else if (Tok.isLiteral())
+ Tok.setLiteralData(DestPtr);
+}
+
+SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
+ auto &SM = getSourceManager();
+ SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
+ bool Invalid = false;
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ if (Invalid)
+ return SourceLocation();
+
+ // FIXME: We could consider re-using spelling for tokens we see repeatedly.
+ const char *DestPtr;
+ SourceLocation Spelling =
+ ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
+ return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
+}
+
+Module *Preprocessor::getCurrentModule() {
+ if (!getLangOpts().isCompilingModule())
+ return nullptr;
+
+ return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Initialization Methods
+//===----------------------------------------------------------------------===//
+
+/// EnterMainSourceFile - Enter the specified FileID as the main source file,
+/// which implicitly adds the builtin defines etc.
+void Preprocessor::EnterMainSourceFile() {
+ // We do not allow the preprocessor to reenter the main file. Doing so will
+ // cause FileID's to accumulate information from both runs (e.g. #line
+ // information) and predefined macros aren't guaranteed to be set properly.
+ assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
+ FileID MainFileID = SourceMgr.getMainFileID();
+
+ // If MainFileID is loaded it means we loaded an AST file, no need to enter
+ // a main file.
+ if (!SourceMgr.isLoadedFileID(MainFileID)) {
+ // Enter the main file source buffer.
+ EnterSourceFile(MainFileID, nullptr, SourceLocation());
+
+ // If we've been asked to skip bytes in the main file (e.g., as part of a
+ // precompiled preamble), do so now.
+ if (SkipMainFilePreamble.first > 0)
+ CurLexer->SetByteOffset(SkipMainFilePreamble.first,
+ SkipMainFilePreamble.second);
+
+ // Tell the header info that the main file was entered. If the file is later
+ // #imported, it won't be re-entered.
+ if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
+ HeaderInfo.IncrementIncludeCount(FE);
+ }
+
+ // Preprocess Predefines to populate the initial preprocessor state.
+ std::unique_ptr<llvm::MemoryBuffer> SB =
+ llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
+ assert(SB && "Cannot create predefined source buffer");
+ FileID FID = SourceMgr.createFileID(std::move(SB));
+ assert(FID.isValid() && "Could not create FileID for predefines?");
+ setPredefinesFileID(FID);
+
+ // Start parsing the predefines.
+ EnterSourceFile(FID, nullptr, SourceLocation());
+
+ if (!PPOpts->PCHThroughHeader.empty()) {
+ // Lookup and save the FileID for the through header. If it isn't found
+ // in the search path, it's a fatal error.
+ const DirectoryLookup *CurDir;
+ Optional<FileEntryRef> File = LookupFile(
+ SourceLocation(), PPOpts->PCHThroughHeader,
+ /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
+ /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
+ /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
+ /*IsFrameworkFound=*/nullptr);
+ if (!File) {
+ Diag(SourceLocation(), diag::err_pp_through_header_not_found)
+ << PPOpts->PCHThroughHeader;
+ return;
+ }
+ setPCHThroughHeaderFileID(
+ SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
+ }
+
+ // Skip tokens from the Predefines and if needed the main file.
+ if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
+ (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
+ SkipTokensWhileUsingPCH();
+}
+
+void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
+ assert(PCHThroughHeaderFileID.isInvalid() &&
+ "PCHThroughHeaderFileID already set!");
+ PCHThroughHeaderFileID = FID;
+}
+
+bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
+ assert(PCHThroughHeaderFileID.isValid() &&
+ "Invalid PCH through header FileID");
+ return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
+}
+
+bool Preprocessor::creatingPCHWithThroughHeader() {
+ return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
+ PCHThroughHeaderFileID.isValid();
+}
+
+bool Preprocessor::usingPCHWithThroughHeader() {
+ return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() &&
+ PCHThroughHeaderFileID.isValid();
+}
+
+bool Preprocessor::creatingPCHWithPragmaHdrStop() {
+ return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop;
+}
+
+bool Preprocessor::usingPCHWithPragmaHdrStop() {
+ return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop;
+}
+
+/// Skip tokens until after the #include of the through header or
+/// until after a #pragma hdrstop is seen. Tokens in the predefines file
+/// and the main file may be skipped. If the end of the predefines file
+/// is reached, skipping continues into the main file. If the end of the
+/// main file is reached, it's a fatal error.
+void Preprocessor::SkipTokensWhileUsingPCH() {
+ bool ReachedMainFileEOF = false;
+ bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
+ bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
+ Token Tok;
+ while (true) {
+ bool InPredefines =
+ (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
+ switch (CurLexerKind) {
+ case CLK_Lexer:
+ CurLexer->Lex(Tok);
+ break;
+ case CLK_TokenLexer:
+ CurTokenLexer->Lex(Tok);
+ break;
+ case CLK_CachingLexer:
+ CachingLex(Tok);
+ break;
+ case CLK_LexAfterModuleImport:
+ LexAfterModuleImport(Tok);
+ break;
+ }
+ if (Tok.is(tok::eof) && !InPredefines) {
+ ReachedMainFileEOF = true;
+ break;
+ }
+ if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
+ break;
+ if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
+ break;
+ }
+ if (ReachedMainFileEOF) {
+ if (UsingPCHThroughHeader)
+ Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
+ << PPOpts->PCHThroughHeader << 1;
+ else if (!PPOpts->PCHWithHdrStopCreate)
+ Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
+ }
+}
+
+void Preprocessor::replayPreambleConditionalStack() {
+ // Restore the conditional stack from the preamble, if there is one.
+ if (PreambleConditionalStack.isReplaying()) {
+ assert(CurPPLexer &&
+ "CurPPLexer is null when calling replayPreambleConditionalStack.");
+ CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
+ PreambleConditionalStack.doneReplaying();
+ if (PreambleConditionalStack.reachedEOFWhileSkipping())
+ SkipExcludedConditionalBlock(
+ PreambleConditionalStack.SkipInfo->HashTokenLoc,
+ PreambleConditionalStack.SkipInfo->IfTokenLoc,
+ PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
+ PreambleConditionalStack.SkipInfo->FoundElse,
+ PreambleConditionalStack.SkipInfo->ElseLoc);
+ }
+}
+
+void Preprocessor::EndSourceFile() {
+ // Notify the client that we reached the end of the source file.
+ if (Callbacks)
+ Callbacks->EndOfMainFile();
+}
+
+//===----------------------------------------------------------------------===//
+// Lexer Event Handling.
+//===----------------------------------------------------------------------===//
+
+/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
+/// identifier information for the token and install it into the token,
+/// updating the token kind accordingly.
+IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
+ assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
+
+ // Look up this token, see if it is a macro, or if it is a language keyword.
+ IdentifierInfo *II;
+ if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
+ // No cleaning needed, just use the characters from the lexed buffer.
+ II = getIdentifierInfo(Identifier.getRawIdentifier());
+ } else {
+ // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
+ SmallString<64> IdentifierBuffer;
+ StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
+
+ if (Identifier.hasUCN()) {
+ SmallString<64> UCNIdentifierBuffer;
+ expandUCNs(UCNIdentifierBuffer, CleanedStr);
+ II = getIdentifierInfo(UCNIdentifierBuffer);
+ } else {
+ II = getIdentifierInfo(CleanedStr);
+ }
+ }
+
+ // Update the token info (identifier info and appropriate token kind).
+ Identifier.setIdentifierInfo(II);
+ if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
+ getSourceManager().isInSystemHeader(Identifier.getLocation()))
+ Identifier.setKind(tok::identifier);
+ else
+ Identifier.setKind(II->getTokenID());
+
+ return II;
+}
+
+void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
+ PoisonReasons[II] = DiagID;
+}
+
+void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
+ assert(Ident__exception_code && Ident__exception_info);
+ assert(Ident___exception_code && Ident___exception_info);
+ Ident__exception_code->setIsPoisoned(Poison);
+ Ident___exception_code->setIsPoisoned(Poison);
+ Ident_GetExceptionCode->setIsPoisoned(Poison);
+ Ident__exception_info->setIsPoisoned(Poison);
+ Ident___exception_info->setIsPoisoned(Poison);
+ Ident_GetExceptionInfo->setIsPoisoned(Poison);
+ Ident__abnormal_termination->setIsPoisoned(Poison);
+ Ident___abnormal_termination->setIsPoisoned(Poison);
+ Ident_AbnormalTermination->setIsPoisoned(Poison);
+}
+
+void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
+ assert(Identifier.getIdentifierInfo() &&
+ "Can't handle identifiers without identifier info!");
+ llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
+ PoisonReasons.find(Identifier.getIdentifierInfo());
+ if(it == PoisonReasons.end())
+ Diag(Identifier, diag::err_pp_used_poisoned_id);
+ else
+ Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
+}
+
+/// Returns a diagnostic message kind for reporting a future keyword as
+/// appropriate for the identifier and specified language.
+static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
+ const LangOptions &LangOpts) {
+ assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
+
+ if (LangOpts.CPlusPlus)
+ return llvm::StringSwitch<diag::kind>(II.getName())
+#define CXX11_KEYWORD(NAME, FLAGS) \
+ .Case(#NAME, diag::warn_cxx11_keyword)
+#define CXX2A_KEYWORD(NAME, FLAGS) \
+ .Case(#NAME, diag::warn_cxx2a_keyword)
+#include "clang/Basic/TokenKinds.def"
+ ;
+
+ llvm_unreachable(
+ "Keyword not known to come from a newer Standard or proposed Standard");
+}
+
+void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
+ assert(II.isOutOfDate() && "not out of date");
+ getExternalSource()->updateOutOfDateIdentifier(II);
+}
+
+/// HandleIdentifier - This callback is invoked when the lexer reads an
+/// identifier. This callback looks up the identifier in the map and/or
+/// potentially macro expands it or turns it into a named token (like 'for').
+///
+/// Note that callers of this method are guarded by checking the
+/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
+/// IdentifierInfo methods that compute these properties will need to change to
+/// match.
+bool Preprocessor::HandleIdentifier(Token &Identifier) {
+ assert(Identifier.getIdentifierInfo() &&
+ "Can't handle identifiers without identifier info!");
+
+ IdentifierInfo &II = *Identifier.getIdentifierInfo();
+
+ // If the information about this identifier is out of date, update it from
+ // the external source.
+ // We have to treat __VA_ARGS__ in a special way, since it gets
+ // serialized with isPoisoned = true, but our preprocessor may have
+ // unpoisoned it if we're defining a C99 macro.
+ if (II.isOutOfDate()) {
+ bool CurrentIsPoisoned = false;
+ const bool IsSpecialVariadicMacro =
+ &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
+ if (IsSpecialVariadicMacro)
+ CurrentIsPoisoned = II.isPoisoned();
+
+ updateOutOfDateIdentifier(II);
+ Identifier.setKind(II.getTokenID());
+
+ if (IsSpecialVariadicMacro)
+ II.setIsPoisoned(CurrentIsPoisoned);
+ }
+
+ // If this identifier was poisoned, and if it was not produced from a macro
+ // expansion, emit an error.
+ if (II.isPoisoned() && CurPPLexer) {
+ HandlePoisonedIdentifier(Identifier);
+ }
+
+ // If this is a macro to be expanded, do it.
+ if (MacroDefinition MD = getMacroDefinition(&II)) {
+ auto *MI = MD.getMacroInfo();
+ assert(MI && "macro definition with no macro info?");
+ if (!DisableMacroExpansion) {
+ if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
+ // C99 6.10.3p10: If the preprocessing token immediately after the
+ // macro name isn't a '(', this macro should not be expanded.
+ if (!MI->isFunctionLike() || isNextPPTokenLParen())
+ return HandleMacroExpandedIdentifier(Identifier, MD);
+ } else {
+ // C99 6.10.3.4p2 says that a disabled macro may never again be
+ // expanded, even if it's in a context where it could be expanded in the
+ // future.
+ Identifier.setFlag(Token::DisableExpand);
+ if (MI->isObjectLike() || isNextPPTokenLParen())
+ Diag(Identifier, diag::pp_disabled_macro_expansion);
+ }
+ }
+ }
+
+ // If this identifier is a keyword in a newer Standard or proposed Standard,
+ // produce a warning. Don't warn if we're not considering macro expansion,
+ // since this identifier might be the name of a macro.
+ // FIXME: This warning is disabled in cases where it shouldn't be, like
+ // "#define constexpr constexpr", "int constexpr;"
+ if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
+ Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
+ << II.getName();
+ // Don't diagnose this keyword again in this translation unit.
+ II.setIsFutureCompatKeyword(false);
+ }
+
+ // If this is an extension token, diagnose its use.
+ // We avoid diagnosing tokens that originate from macro definitions.
+ // FIXME: This warning is disabled in cases where it shouldn't be,
+ // like "#define TY typeof", "TY(1) x".
+ if (II.isExtensionToken() && !DisableMacroExpansion)
+ Diag(Identifier, diag::ext_token_used);
+
+ // If this is the 'import' contextual keyword following an '@', note
+ // that the next token indicates a module name.
+ //
+ // Note that we do not treat 'import' as a contextual
+ // keyword when we're in a caching lexer, because caching lexers only get
+ // used in contexts where import declarations are disallowed.
+ //
+ // Likewise if this is the C++ Modules TS import keyword.
+ if (((LastTokenWasAt && II.isModulesImport()) ||
+ Identifier.is(tok::kw_import)) &&
+ !InMacroArgs && !DisableMacroExpansion &&
+ (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
+ CurLexerKind != CLK_CachingLexer) {
+ ModuleImportLoc = Identifier.getLocation();
+ ModuleImportPath.clear();
+ ModuleImportExpectsIdentifier = true;
+ CurLexerKind = CLK_LexAfterModuleImport;
+ }
+ return true;
+}
+
+void Preprocessor::Lex(Token &Result) {
+ ++LexLevel;
+
+ // We loop here until a lex function returns a token; this avoids recursion.
+ bool ReturnedToken;
+ do {
+ switch (CurLexerKind) {
+ case CLK_Lexer:
+ ReturnedToken = CurLexer->Lex(Result);
+ break;
+ case CLK_TokenLexer:
+ ReturnedToken = CurTokenLexer->Lex(Result);
+ break;
+ case CLK_CachingLexer:
+ CachingLex(Result);
+ ReturnedToken = true;
+ break;
+ case CLK_LexAfterModuleImport:
+ ReturnedToken = LexAfterModuleImport(Result);
+ break;
+ }
+ } while (!ReturnedToken);
+
+ if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
+ // Remember the identifier before code completion token.
+ setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
+ setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
+ // Set IdenfitierInfo to null to avoid confusing code that handles both
+ // identifiers and completion tokens.
+ Result.setIdentifierInfo(nullptr);
+ }
+
+ // Update ImportSeqState to track our position within a C++20 import-seq
+ // if this token is being produced as a result of phase 4 of translation.
+ if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
+ !Result.getFlag(Token::IsReinjected)) {
+ switch (Result.getKind()) {
+ case tok::l_paren: case tok::l_square: case tok::l_brace:
+ ImportSeqState.handleOpenBracket();
+ break;
+ case tok::r_paren: case tok::r_square:
+ ImportSeqState.handleCloseBracket();
+ break;
+ case tok::r_brace:
+ ImportSeqState.handleCloseBrace();
+ break;
+ case tok::semi:
+ ImportSeqState.handleSemi();
+ break;
+ case tok::header_name:
+ case tok::annot_header_unit:
+ ImportSeqState.handleHeaderName();
+ break;
+ case tok::kw_export:
+ ImportSeqState.handleExport();
+ break;
+ case tok::identifier:
+ if (Result.getIdentifierInfo()->isModulesImport()) {
+ ImportSeqState.handleImport();
+ if (ImportSeqState.afterImportSeq()) {
+ ModuleImportLoc = Result.getLocation();
+ ModuleImportPath.clear();
+ ModuleImportExpectsIdentifier = true;
+ CurLexerKind = CLK_LexAfterModuleImport;
+ }
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ default:
+ ImportSeqState.handleMisc();
+ break;
+ }
+ }
+
+ LastTokenWasAt = Result.is(tok::at);
+ --LexLevel;
+ if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected))
+ OnToken(Result);
+}
+
+/// Lex a header-name token (including one formed from header-name-tokens if
+/// \p AllowConcatenation is \c true).
+///
+/// \param FilenameTok Filled in with the next token. On success, this will
+/// be either a header_name token. On failure, it will be whatever other
+/// token was found instead.
+/// \param AllowMacroExpansion If \c true, allow the header name to be formed
+/// by macro expansion (concatenating tokens as necessary if the first
+/// token is a '<').
+/// \return \c true if we reached EOD or EOF while looking for a > token in
+/// a concatenated header name and diagnosed it. \c false otherwise.
+bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
+ // Lex using header-name tokenization rules if tokens are being lexed from
+ // a file. Just grab a token normally if we're in a macro expansion.
+ if (CurPPLexer)
+ CurPPLexer->LexIncludeFilename(FilenameTok);
+ else
+ Lex(FilenameTok);
+
+ // This could be a <foo/bar.h> file coming from a macro expansion. In this
+ // case, glue the tokens together into an angle_string_literal token.
+ SmallString<128> FilenameBuffer;
+ if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
+ bool StartOfLine = FilenameTok.isAtStartOfLine();
+ bool LeadingSpace = FilenameTok.hasLeadingSpace();
+ bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
+
+ SourceLocation Start = FilenameTok.getLocation();
+ SourceLocation End;
+ FilenameBuffer.push_back('<');
+
+ // Consume tokens until we find a '>'.
+ // FIXME: A header-name could be formed starting or ending with an
+ // alternative token. It's not clear whether that's ill-formed in all
+ // cases.
+ while (FilenameTok.isNot(tok::greater)) {
+ Lex(FilenameTok);
+ if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
+ Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
+ Diag(Start, diag::note_matching) << tok::less;
+ return true;
+ }
+
+ End = FilenameTok.getLocation();
+
+ // FIXME: Provide code completion for #includes.
+ if (FilenameTok.is(tok::code_completion)) {
+ setCodeCompletionReached();
+ Lex(FilenameTok);
+ continue;
+ }
+
+ // Append the spelling of this token to the buffer. If there was a space
+ // before it, add it now.
+ if (FilenameTok.hasLeadingSpace())
+ FilenameBuffer.push_back(' ');
+
+ // Get the spelling of the token, directly into FilenameBuffer if
+ // possible.
+ size_t PreAppendSize = FilenameBuffer.size();
+ FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
+
+ const char *BufPtr = &FilenameBuffer[PreAppendSize];
+ unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
+
+ // If the token was spelled somewhere else, copy it into FilenameBuffer.
+ if (BufPtr != &FilenameBuffer[PreAppendSize])
+ memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
+
+ // Resize FilenameBuffer to the correct size.
+ if (FilenameTok.getLength() != ActualLen)
+ FilenameBuffer.resize(PreAppendSize + ActualLen);
+ }
+
+ FilenameTok.startToken();
+ FilenameTok.setKind(tok::header_name);
+ FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
+ FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
+ FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
+ CreateString(FilenameBuffer, FilenameTok, Start, End);
+ } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
+ // Convert a string-literal token of the form " h-char-sequence "
+ // (produced by macro expansion) into a header-name token.
+ //
+ // The rules for header-names don't quite match the rules for
+ // string-literals, but all the places where they differ result in
+ // undefined behavior, so we can and do treat them the same.
+ //
+ // A string-literal with a prefix or suffix is not translated into a
+ // header-name. This could theoretically be observable via the C++20
+ // context-sensitive header-name formation rules.
+ StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
+ if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
+ FilenameTok.setKind(tok::header_name);
+ }
+
+ return false;
+}
+
+/// Collect the tokens of a C++20 pp-import-suffix.
+void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
+ // FIXME: For error recovery, consider recognizing attribute syntax here
+ // and terminating / diagnosing a missing semicolon if we find anything
+ // else? (Can we leave that to the parser?)
+ unsigned BracketDepth = 0;
+ while (true) {
+ Toks.emplace_back();
+ Lex(Toks.back());
+
+ switch (Toks.back().getKind()) {
+ case tok::l_paren: case tok::l_square: case tok::l_brace:
+ ++BracketDepth;
+ break;
+
+ case tok::r_paren: case tok::r_square: case tok::r_brace:
+ if (BracketDepth == 0)
+ return;
+ --BracketDepth;
+ break;
+
+ case tok::semi:
+ if (BracketDepth == 0)
+ return;
+ break;
+
+ case tok::eof:
+ return;
+
+ default:
+ break;
+ }
+ }
+}
+
+
+/// Lex a token following the 'import' contextual keyword.
+///
+/// pp-import: [C++20]
+/// import header-name pp-import-suffix[opt] ;
+/// import header-name-tokens pp-import-suffix[opt] ;
+/// [ObjC] @ import module-name ;
+/// [Clang] import module-name ;
+///
+/// header-name-tokens:
+/// string-literal
+/// < [any sequence of preprocessing-tokens other than >] >
+///
+/// module-name:
+/// module-name-qualifier[opt] identifier
+///
+/// module-name-qualifier
+/// module-name-qualifier[opt] identifier .
+///
+/// We respond to a pp-import by importing macros from the named module.
+bool Preprocessor::LexAfterModuleImport(Token &Result) {
+ // Figure out what kind of lexer we actually have.
+ recomputeCurLexerKind();
+
+ // Lex the next token. The header-name lexing rules are used at the start of
+ // a pp-import.
+ //
+ // For now, we only support header-name imports in C++20 mode.
+ // FIXME: Should we allow this in all language modes that support an import
+ // declaration as an extension?
+ if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
+ if (LexHeaderName(Result))
+ return true;
+ } else {
+ Lex(Result);
+ }
+
+ // Allocate a holding buffer for a sequence of tokens and introduce it into
+ // the token stream.
+ auto EnterTokens = [this](ArrayRef<Token> Toks) {
+ auto ToksCopy = std::make_unique<Token[]>(Toks.size());
+ std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+ EnterTokenStream(std::move(ToksCopy), Toks.size(),
+ /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+ };
+
+ // Check for a header-name.
+ SmallVector<Token, 32> Suffix;
+ if (Result.is(tok::header_name)) {
+ // Enter the header-name token into the token stream; a Lex action cannot
+ // both return a token and cache tokens (doing so would corrupt the token
+ // cache if the call to Lex comes from CachingLex / PeekAhead).
+ Suffix.push_back(Result);
+
+ // Consume the pp-import-suffix and expand any macros in it now. We'll add
+ // it back into the token stream later.
+ CollectPpImportSuffix(Suffix);
+ if (Suffix.back().isNot(tok::semi)) {
+ // This is not a pp-import after all.
+ EnterTokens(Suffix);
+ return false;
+ }
+
+ // C++2a [cpp.module]p1:
+ // The ';' preprocessing-token terminating a pp-import shall not have
+ // been produced by macro replacement.
+ SourceLocation SemiLoc = Suffix.back().getLocation();
+ if (SemiLoc.isMacroID())
+ Diag(SemiLoc, diag::err_header_import_semi_in_macro);
+
+ // Reconstitute the import token.
+ Token ImportTok;
+ ImportTok.startToken();
+ ImportTok.setKind(tok::kw_import);
+ ImportTok.setLocation(ModuleImportLoc);
+ ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
+ ImportTok.setLength(6);
+
+ auto Action = HandleHeaderIncludeOrImport(
+ /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
+ switch (Action.Kind) {
+ case ImportAction::None:
+ break;
+
+ case ImportAction::ModuleBegin:
+ // Let the parser know we're textually entering the module.
+ Suffix.emplace_back();
+ Suffix.back().startToken();
+ Suffix.back().setKind(tok::annot_module_begin);
+ Suffix.back().setLocation(SemiLoc);
+ Suffix.back().setAnnotationEndLoc(SemiLoc);
+ Suffix.back().setAnnotationValue(Action.ModuleForHeader);
+ LLVM_FALLTHROUGH;
+
+ case ImportAction::ModuleImport:
+ case ImportAction::SkippedModuleImport:
+ // We chose to import (or textually enter) the file. Convert the
+ // header-name token into a header unit annotation token.
+ Suffix[0].setKind(tok::annot_header_unit);
+ Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
+ Suffix[0].setAnnotationValue(Action.ModuleForHeader);
+ // FIXME: Call the moduleImport callback?
+ break;
+ }
+
+ EnterTokens(Suffix);
+ return false;
+ }
+
+ // The token sequence
+ //
+ // import identifier (. identifier)*
+ //
+ // indicates a module import directive. We already saw the 'import'
+ // contextual keyword, so now we're looking for the identifiers.
+ if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
+ // We expected to see an identifier here, and we did; continue handling
+ // identifiers.
+ ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
+ Result.getLocation()));
+ ModuleImportExpectsIdentifier = false;
+ CurLexerKind = CLK_LexAfterModuleImport;
+ return true;
+ }
+
+ // If we're expecting a '.' or a ';', and we got a '.', then wait until we
+ // see the next identifier. (We can also see a '[[' that begins an
+ // attribute-specifier-seq here under the C++ Modules TS.)
+ if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
+ ModuleImportExpectsIdentifier = true;
+ CurLexerKind = CLK_LexAfterModuleImport;
+ return true;
+ }
+
+ // If we didn't recognize a module name at all, this is not a (valid) import.
+ if (ModuleImportPath.empty() || Result.is(tok::eof))
+ return true;
+
+ // Consume the pp-import-suffix and expand any macros in it now, if we're not
+ // at the semicolon already.
+ SourceLocation SemiLoc = Result.getLocation();
+ if (Result.isNot(tok::semi)) {
+ Suffix.push_back(Result);
+ CollectPpImportSuffix(Suffix);
+ if (Suffix.back().isNot(tok::semi)) {
+ // This is not an import after all.
+ EnterTokens(Suffix);
+ return false;
+ }
+ SemiLoc = Suffix.back().getLocation();
+ }
+
+ // Under the Modules TS, the dot is just part of the module name, and not
+ // a real hierarchy separator. Flatten such module names now.
+ //
+ // FIXME: Is this the right level to be performing this transformation?
+ std::string FlatModuleName;
+ if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
+ for (auto &Piece : ModuleImportPath) {
+ if (!FlatModuleName.empty())
+ FlatModuleName += ".";
+ FlatModuleName += Piece.first->getName();
+ }
+ SourceLocation FirstPathLoc = ModuleImportPath[0].second;
+ ModuleImportPath.clear();
+ ModuleImportPath.push_back(
+ std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
+ }
+
+ Module *Imported = nullptr;
+ if (getLangOpts().Modules) {
+ Imported = TheModuleLoader.loadModule(ModuleImportLoc,
+ ModuleImportPath,
+ Module::Hidden,
+ /*IsInclusionDirective=*/false);
+ if (Imported)
+ makeModuleVisible(Imported, SemiLoc);
+ }
+ if (Callbacks)
+ Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
+
+ if (!Suffix.empty()) {
+ EnterTokens(Suffix);
+ return false;
+ }
+ return true;
+}
+
+void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
+ CurSubmoduleState->VisibleModules.setVisible(
+ M, Loc, [](Module *) {},
+ [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
+ // FIXME: Include the path in the diagnostic.
+ // FIXME: Include the import location for the conflicting module.
+ Diag(ModuleImportLoc, diag::warn_module_conflict)
+ << Path[0]->getFullModuleName()
+ << Conflict->getFullModuleName()
+ << Message;
+ });
+
+ // Add this module to the imports list of the currently-built submodule.
+ if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
+ BuildingSubmoduleStack.back().M->Imports.insert(M);
+}
+
+bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
+ const char *DiagnosticTag,
+ bool AllowMacroExpansion) {
+ // We need at least one string literal.
+ if (Result.isNot(tok::string_literal)) {
+ Diag(Result, diag::err_expected_string_literal)
+ << /*Source='in...'*/0 << DiagnosticTag;
+ return false;
+ }
+
+ // Lex string literal tokens, optionally with macro expansion.
+ SmallVector<Token, 4> StrToks;
+ do {
+ StrToks.push_back(Result);
+
+ if (Result.hasUDSuffix())
+ Diag(Result, diag::err_invalid_string_udl);
+
+ if (AllowMacroExpansion)
+ Lex(Result);
+ else
+ LexUnexpandedToken(Result);
+ } while (Result.is(tok::string_literal));
+
+ // Concatenate and parse the strings.
+ StringLiteralParser Literal(StrToks, *this);
+ assert(Literal.isAscii() && "Didn't allow wide strings in");
+
+ if (Literal.hadError)
+ return false;
+
+ if (Literal.Pascal) {
+ Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
+ << /*Source='in...'*/0 << DiagnosticTag;
+ return false;
+ }
+
+ String = Literal.GetString();
+ return true;
+}
+
+bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
+ assert(Tok.is(tok::numeric_constant));
+ SmallString<8> IntegerBuffer;
+ bool NumberInvalid = false;
+ StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
+ if (NumberInvalid)
+ return false;
+ NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
+ if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
+ return false;
+ llvm::APInt APVal(64, 0);
+ if (Literal.GetIntegerValue(APVal))
+ return false;
+ Lex(Tok);
+ Value = APVal.getLimitedValue();
+ return true;
+}
+
+void Preprocessor::addCommentHandler(CommentHandler *Handler) {
+ assert(Handler && "NULL comment handler");
+ assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
+ "Comment handler already registered");
+ CommentHandlers.push_back(Handler);
+}
+
+void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
+ std::vector<CommentHandler *>::iterator Pos =
+ llvm::find(CommentHandlers, Handler);
+ assert(Pos != CommentHandlers.end() && "Comment handler not registered");
+ CommentHandlers.erase(Pos);
+}
+
+bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
+ bool AnyPendingTokens = false;
+ for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
+ HEnd = CommentHandlers.end();
+ H != HEnd; ++H) {
+ if ((*H)->HandleComment(*this, Comment))
+ AnyPendingTokens = true;
+ }
+ if (!AnyPendingTokens || getCommentRetentionState())
+ return false;
+ Lex(result);
+ return true;
+}
+
+ModuleLoader::~ModuleLoader() = default;
+
+CommentHandler::~CommentHandler() = default;
+
+CodeCompletionHandler::~CodeCompletionHandler() = default;
+
+void Preprocessor::createPreprocessingRecord() {
+ if (Record)
+ return;
+
+ Record = new PreprocessingRecord(getSourceManager());
+ addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
+}
diff --git a/clang/lib/Lex/PreprocessorLexer.cpp b/clang/lib/Lex/PreprocessorLexer.cpp
new file mode 100644
index 000000000000..5f6f4a13419b
--- /dev/null
+++ b/clang/lib/Lex/PreprocessorLexer.cpp
@@ -0,0 +1,52 @@
+//===- PreprocessorLexer.cpp - C Language Family Lexer --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PreprocessorLexer and Token interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
+#include <cassert>
+
+using namespace clang;
+
+void PreprocessorLexer::anchor() {}
+
+PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid)
+ : PP(pp), FID(fid) {
+ if (pp)
+ InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size();
+}
+
+/// After the preprocessor has parsed a \#include, lex and
+/// (potentially) macro expand the filename.
+void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
+ assert(ParsingFilename == false && "reentered LexIncludeFilename");
+
+ // We are now parsing a filename!
+ ParsingFilename = true;
+
+ // Lex the filename.
+ if (LexingRawMode)
+ IndirectLex(FilenameTok);
+ else
+ PP->Lex(FilenameTok);
+
+ // We should have obtained the filename now.
+ ParsingFilename = false;
+}
+
+/// getFileEntry - Return the FileEntry corresponding to this FileID. Like
+/// getFileID(), this only works for lexers with attached preprocessors.
+const FileEntry *PreprocessorLexer::getFileEntry() const {
+ return PP->getSourceManager().getFileEntryForID(getFileID());
+}
diff --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp
new file mode 100644
index 000000000000..19ab93ec54b4
--- /dev/null
+++ b/clang/lib/Lex/ScratchBuffer.cpp
@@ -0,0 +1,83 @@
+//===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScratchBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstring>
+using namespace clang;
+
+// ScratchBufSize - The size of each chunk of scratch memory. Slightly less
+//than a page, almost certainly enough for anything. :)
+static const unsigned ScratchBufSize = 4060;
+
+ScratchBuffer::ScratchBuffer(SourceManager &SM)
+ : SourceMgr(SM), CurBuffer(nullptr) {
+ // Set BytesUsed so that the first call to getToken will require an alloc.
+ BytesUsed = ScratchBufSize;
+}
+
+/// getToken - Splat the specified text into a temporary MemoryBuffer and
+/// return a SourceLocation that refers to the token. This is just like the
+/// method below, but returns a location that indicates the physloc of the
+/// token.
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+ const char *&DestPtr) {
+ if (BytesUsed+Len+2 > ScratchBufSize)
+ AllocScratchBuffer(Len+2);
+ else {
+ // Clear out the source line cache if it's already been computed.
+ // FIXME: Allow this to be incrementally extended.
+ auto *ContentCache = const_cast<SrcMgr::ContentCache *>(
+ SourceMgr.getSLocEntry(SourceMgr.getFileID(BufferStartLoc))
+ .getFile().getContentCache());
+ ContentCache->SourceLineCache = nullptr;
+ }
+
+ // Prefix the token with a \n, so that it looks like it is the first thing on
+ // its own virtual line in caret diagnostics.
+ CurBuffer[BytesUsed++] = '\n';
+
+ // Return a pointer to the character data.
+ DestPtr = CurBuffer+BytesUsed;
+
+ // Copy the token data into the buffer.
+ memcpy(CurBuffer+BytesUsed, Buf, Len);
+
+ // Remember that we used these bytes.
+ BytesUsed += Len+1;
+
+ // Add a NUL terminator to the token. This keeps the tokens separated, in
+ // case they get relexed, and puts them on their own virtual lines in case a
+ // diagnostic points to one.
+ CurBuffer[BytesUsed-1] = '\0';
+
+ return BufferStartLoc.getLocWithOffset(BytesUsed-Len-1);
+}
+
+void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
+ // Only pay attention to the requested length if it is larger than our default
+ // page size. If it is, we allocate an entire chunk for it. This is to
+ // support gigantic tokens, which almost certainly won't happen. :)
+ if (RequestLen < ScratchBufSize)
+ RequestLen = ScratchBufSize;
+
+ // Get scratch buffer. Zero-initialize it so it can be dumped into a PCH file
+ // deterministically.
+ std::unique_ptr<llvm::WritableMemoryBuffer> OwnBuf =
+ llvm::WritableMemoryBuffer::getNewMemBuffer(RequestLen,
+ "<scratch space>");
+ CurBuffer = OwnBuf->getBufferStart();
+ FileID FID = SourceMgr.createFileID(std::move(OwnBuf));
+ BufferStartLoc = SourceMgr.getLocForStartOfFile(FID);
+ BytesUsed = 0;
+}
diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
new file mode 100644
index 000000000000..e626cfcc927f
--- /dev/null
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@@ -0,0 +1,297 @@
+//===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenConcatenation class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/TokenConcatenation.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace clang;
+
+
+/// IsStringPrefix - Return true if Str is a string prefix.
+/// 'L', 'u', 'U', or 'u8'. Including raw versions.
+static bool IsStringPrefix(StringRef Str, bool CPlusPlus11) {
+
+ if (Str[0] == 'L' ||
+ (CPlusPlus11 && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
+
+ if (Str.size() == 1)
+ return true; // "L", "u", "U", and "R"
+
+ // Check for raw flavors. Need to make sure the first character wasn't
+ // already R. Need CPlusPlus11 check for "LR".
+ if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus11)
+ return true; // "LR", "uR", "UR"
+
+ // Check for "u8" and "u8R"
+ if (Str[0] == 'u' && Str[1] == '8') {
+ if (Str.size() == 2) return true; // "u8"
+ if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"
+ }
+ }
+
+ return false;
+}
+
+/// IsIdentifierStringPrefix - Return true if the spelling of the token
+/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.
+bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
+ const LangOptions &LangOpts = PP.getLangOpts();
+
+ if (!Tok.needsCleaning()) {
+ if (Tok.getLength() < 1 || Tok.getLength() > 3)
+ return false;
+ SourceManager &SM = PP.getSourceManager();
+ const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
+ return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
+ LangOpts.CPlusPlus11);
+ }
+
+ if (Tok.getLength() < 256) {
+ char Buffer[256];
+ const char *TokPtr = Buffer;
+ unsigned length = PP.getSpelling(Tok, TokPtr);
+ return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus11);
+ }
+
+ return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus11);
+}
+
+TokenConcatenation::TokenConcatenation(const Preprocessor &pp) : PP(pp) {
+ memset(TokenInfo, 0, sizeof(TokenInfo));
+
+ // These tokens have custom code in AvoidConcat.
+ TokenInfo[tok::identifier ] |= aci_custom;
+ TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
+ TokenInfo[tok::period ] |= aci_custom_firstchar;
+ TokenInfo[tok::amp ] |= aci_custom_firstchar;
+ TokenInfo[tok::plus ] |= aci_custom_firstchar;
+ TokenInfo[tok::minus ] |= aci_custom_firstchar;
+ TokenInfo[tok::slash ] |= aci_custom_firstchar;
+ TokenInfo[tok::less ] |= aci_custom_firstchar;
+ TokenInfo[tok::greater ] |= aci_custom_firstchar;
+ TokenInfo[tok::pipe ] |= aci_custom_firstchar;
+ TokenInfo[tok::percent ] |= aci_custom_firstchar;
+ TokenInfo[tok::colon ] |= aci_custom_firstchar;
+ TokenInfo[tok::hash ] |= aci_custom_firstchar;
+ TokenInfo[tok::arrow ] |= aci_custom_firstchar;
+
+ // These tokens have custom code in C++11 mode.
+ if (PP.getLangOpts().CPlusPlus11) {
+ TokenInfo[tok::string_literal ] |= aci_custom;
+ TokenInfo[tok::wide_string_literal ] |= aci_custom;
+ TokenInfo[tok::utf8_string_literal ] |= aci_custom;
+ TokenInfo[tok::utf16_string_literal] |= aci_custom;
+ TokenInfo[tok::utf32_string_literal] |= aci_custom;
+ TokenInfo[tok::char_constant ] |= aci_custom;
+ TokenInfo[tok::wide_char_constant ] |= aci_custom;
+ TokenInfo[tok::utf16_char_constant ] |= aci_custom;
+ TokenInfo[tok::utf32_char_constant ] |= aci_custom;
+ }
+
+ // These tokens have custom code in C++17 mode.
+ if (PP.getLangOpts().CPlusPlus17)
+ TokenInfo[tok::utf8_char_constant] |= aci_custom;
+
+ // These tokens have custom code in C++2a mode.
+ if (PP.getLangOpts().CPlusPlus2a)
+ TokenInfo[tok::lessequal ] |= aci_custom_firstchar;
+
+ // These tokens change behavior if followed by an '='.
+ TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
+ TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
+ TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
+ TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
+ TokenInfo[tok::less ] |= aci_avoid_equal; // <=
+ TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
+ TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
+ TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
+ TokenInfo[tok::star ] |= aci_avoid_equal; // *=
+ TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
+ TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
+ TokenInfo[tok::greatergreater] |= aci_avoid_equal; // >>=
+ TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
+ TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
+}
+
+/// GetFirstChar - Get the first character of the token \arg Tok,
+/// avoiding calls to getSpelling where possible.
+static char GetFirstChar(const Preprocessor &PP, const Token &Tok) {
+ if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
+ // Avoid spelling identifiers, the most common form of token.
+ return II->getNameStart()[0];
+ } else if (!Tok.needsCleaning()) {
+ if (Tok.isLiteral() && Tok.getLiteralData()) {
+ return *Tok.getLiteralData();
+ } else {
+ SourceManager &SM = PP.getSourceManager();
+ return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
+ }
+ } else if (Tok.getLength() < 256) {
+ char Buffer[256];
+ const char *TokPtr = Buffer;
+ PP.getSpelling(Tok, TokPtr);
+ return TokPtr[0];
+ } else {
+ return PP.getSpelling(Tok)[0];
+ }
+}
+
+/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
+/// the two individual tokens to be lexed as a single token, return true
+/// (which causes a space to be printed between them). This allows the output
+/// of -E mode to be lexed to the same token stream as lexing the input
+/// directly would.
+///
+/// This code must conservatively return true if it doesn't want to be 100%
+/// accurate. This will cause the output to include extra space characters,
+/// but the resulting output won't have incorrect concatenations going on.
+/// Examples include "..", which we print with a space between, because we
+/// don't want to track enough to tell "x.." from "...".
+bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
+ const Token &PrevTok,
+ const Token &Tok) const {
+ // Conservatively assume that every annotation token that has a printable
+ // form requires whitespace.
+ if (PrevTok.isAnnotation())
+ return true;
+
+ // First, check to see if the tokens were directly adjacent in the original
+ // source. If they were, it must be okay to stick them together: if there
+ // were an issue, the tokens would have been lexed differently.
+ SourceManager &SM = PP.getSourceManager();
+ SourceLocation PrevSpellLoc = SM.getSpellingLoc(PrevTok.getLocation());
+ SourceLocation SpellLoc = SM.getSpellingLoc(Tok.getLocation());
+ if (PrevSpellLoc.getLocWithOffset(PrevTok.getLength()) == SpellLoc)
+ return false;
+
+ tok::TokenKind PrevKind = PrevTok.getKind();
+ if (!PrevTok.isAnnotation() && PrevTok.getIdentifierInfo())
+ PrevKind = tok::identifier; // Language keyword or named operator.
+
+ // Look up information on when we should avoid concatenation with prevtok.
+ unsigned ConcatInfo = TokenInfo[PrevKind];
+
+ // If prevtok never causes a problem for anything after it, return quickly.
+ if (ConcatInfo == 0) return false;
+
+ if (ConcatInfo & aci_avoid_equal) {
+ // If the next token is '=' or '==', avoid concatenation.
+ if (Tok.isOneOf(tok::equal, tok::equalequal))
+ return true;
+ ConcatInfo &= ~aci_avoid_equal;
+ }
+ if (Tok.isAnnotation()) {
+ // Modules annotation can show up when generated automatically for includes.
+ assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin,
+ tok::annot_module_end) &&
+ "unexpected annotation in AvoidConcat");
+ ConcatInfo = 0;
+ }
+
+ if (ConcatInfo == 0)
+ return false;
+
+ // Basic algorithm: we look at the first character of the second token, and
+ // determine whether it, if appended to the first token, would form (or
+ // would contribute) to a larger token if concatenated.
+ char FirstChar = 0;
+ if (ConcatInfo & aci_custom) {
+ // If the token does not need to know the first character, don't get it.
+ } else {
+ FirstChar = GetFirstChar(PP, Tok);
+ }
+
+ switch (PrevKind) {
+ default:
+ llvm_unreachable("InitAvoidConcatTokenInfo built wrong");
+
+ case tok::raw_identifier:
+ llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
+
+ case tok::string_literal:
+ case tok::wide_string_literal:
+ case tok::utf8_string_literal:
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
+ case tok::char_constant:
+ case tok::wide_char_constant:
+ case tok::utf8_char_constant:
+ case tok::utf16_char_constant:
+ case tok::utf32_char_constant:
+ if (!PP.getLangOpts().CPlusPlus11)
+ return false;
+
+ // In C++11, a string or character literal followed by an identifier is a
+ // single token.
+ if (Tok.getIdentifierInfo())
+ return true;
+
+ // A ud-suffix is an identifier. If the previous token ends with one, treat
+ // it as an identifier.
+ if (!PrevTok.hasUDSuffix())
+ return false;
+ LLVM_FALLTHROUGH;
+ case tok::identifier: // id+id or id+number or id+L"foo".
+ // id+'.'... will not append.
+ if (Tok.is(tok::numeric_constant))
+ return GetFirstChar(PP, Tok) != '.';
+
+ if (Tok.getIdentifierInfo() ||
+ Tok.isOneOf(tok::wide_string_literal, tok::utf8_string_literal,
+ tok::utf16_string_literal, tok::utf32_string_literal,
+ tok::wide_char_constant, tok::utf8_char_constant,
+ tok::utf16_char_constant, tok::utf32_char_constant))
+ return true;
+
+ // If this isn't identifier + string, we're done.
+ if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
+ return false;
+
+ // Otherwise, this is a narrow character or string. If the *identifier*
+ // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
+ return IsIdentifierStringPrefix(PrevTok);
+
+ case tok::numeric_constant:
+ return isPreprocessingNumberBody(FirstChar) ||
+ FirstChar == '+' || FirstChar == '-';
+ case tok::period: // ..., .*, .1234
+ return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
+ isDigit(FirstChar) ||
+ (PP.getLangOpts().CPlusPlus && FirstChar == '*');
+ case tok::amp: // &&
+ return FirstChar == '&';
+ case tok::plus: // ++
+ return FirstChar == '+';
+ case tok::minus: // --, ->, ->*
+ return FirstChar == '-' || FirstChar == '>';
+ case tok::slash: //, /*, //
+ return FirstChar == '*' || FirstChar == '/';
+ case tok::less: // <<, <<=, <:, <%
+ return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
+ case tok::greater: // >>, >>=
+ return FirstChar == '>';
+ case tok::pipe: // ||
+ return FirstChar == '|';
+ case tok::percent: // %>, %:
+ return FirstChar == '>' || FirstChar == ':';
+ case tok::colon: // ::, :>
+ return FirstChar == '>' ||
+ (PP.getLangOpts().CPlusPlus && FirstChar == ':');
+ case tok::hash: // ##, #@, %:%:
+ return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
+ case tok::arrow: // ->*
+ return PP.getLangOpts().CPlusPlus && FirstChar == '*';
+ case tok::lessequal: // <=> (C++2a)
+ return PP.getLangOpts().CPlusPlus2a && FirstChar == '>';
+ }
+}
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
new file mode 100644
index 000000000000..da5681aaf478
--- /dev/null
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -0,0 +1,1079 @@
+//===- TokenLexer.cpp - Lex from a token stream ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/TokenLexer.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/Token.h"
+#include "clang/Lex/VariadicMacroSupport.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cassert>
+#include <cstring>
+
+using namespace clang;
+
+/// Create a TokenLexer for the specified macro with the specified actual
+/// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
+void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
+ MacroArgs *Actuals) {
+ // If the client is reusing a TokenLexer, make sure to free any memory
+ // associated with it.
+ destroy();
+
+ Macro = MI;
+ ActualArgs = Actuals;
+ CurTokenIdx = 0;
+
+ ExpandLocStart = Tok.getLocation();
+ ExpandLocEnd = ELEnd;
+ AtStartOfLine = Tok.isAtStartOfLine();
+ HasLeadingSpace = Tok.hasLeadingSpace();
+ NextTokGetsSpace = false;
+ Tokens = &*Macro->tokens_begin();
+ OwnsTokens = false;
+ DisableMacroExpansion = false;
+ IsReinject = false;
+ NumTokens = Macro->tokens_end()-Macro->tokens_begin();
+ MacroExpansionStart = SourceLocation();
+
+ SourceManager &SM = PP.getSourceManager();
+ MacroStartSLocOffset = SM.getNextLocalOffset();
+
+ if (NumTokens > 0) {
+ assert(Tokens[0].getLocation().isValid());
+ assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
+ "Macro defined in macro?");
+ assert(ExpandLocStart.isValid());
+
+ // Reserve a source location entry chunk for the length of the macro
+ // definition. Tokens that get lexed directly from the definition will
+ // have their locations pointing inside this chunk. This is to avoid
+ // creating separate source location entries for each token.
+ MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
+ MacroDefLength = Macro->getDefinitionLength(SM);
+ MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
+ ExpandLocStart,
+ ExpandLocEnd,
+ MacroDefLength);
+ }
+
+ // If this is a function-like macro, expand the arguments and change
+ // Tokens to point to the expanded tokens.
+ if (Macro->isFunctionLike() && Macro->getNumParams())
+ ExpandFunctionArguments();
+
+ // Mark the macro as currently disabled, so that it is not recursively
+ // expanded. The macro must be disabled only after argument pre-expansion of
+ // function-like macro arguments occurs.
+ Macro->DisableMacro();
+}
+
+/// Create a TokenLexer for the specified token stream. This does not
+/// take ownership of the specified token vector.
+void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
+ bool disableMacroExpansion, bool ownsTokens,
+ bool isReinject) {
+ assert(!isReinject || disableMacroExpansion);
+ // If the client is reusing a TokenLexer, make sure to free any memory
+ // associated with it.
+ destroy();
+
+ Macro = nullptr;
+ ActualArgs = nullptr;
+ Tokens = TokArray;
+ OwnsTokens = ownsTokens;
+ DisableMacroExpansion = disableMacroExpansion;
+ IsReinject = isReinject;
+ NumTokens = NumToks;
+ CurTokenIdx = 0;
+ ExpandLocStart = ExpandLocEnd = SourceLocation();
+ AtStartOfLine = false;
+ HasLeadingSpace = false;
+ NextTokGetsSpace = false;
+ MacroExpansionStart = SourceLocation();
+
+ // Set HasLeadingSpace/AtStartOfLine so that the first token will be
+ // returned unmodified.
+ if (NumToks != 0) {
+ AtStartOfLine = TokArray[0].isAtStartOfLine();
+ HasLeadingSpace = TokArray[0].hasLeadingSpace();
+ }
+}
+
+void TokenLexer::destroy() {
+ // If this was a function-like macro that actually uses its arguments, delete
+ // the expanded tokens.
+ if (OwnsTokens) {
+ delete [] Tokens;
+ Tokens = nullptr;
+ OwnsTokens = false;
+ }
+
+ // TokenLexer owns its formal arguments.
+ if (ActualArgs) ActualArgs->destroy(PP);
+}
+
+bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
+ SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
+ unsigned MacroArgNo, Preprocessor &PP) {
+ // Is the macro argument __VA_ARGS__?
+ if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1)
+ return false;
+
+ // In Microsoft-compatibility mode, a comma is removed in the expansion
+ // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
+ // not supported by gcc.
+ if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
+ return false;
+
+ // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
+ // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
+ // named arguments, where it remains. In all other modes, including C99
+ // with GNU extensions, it is removed regardless of named arguments.
+ // Microsoft also appears to support this extension, unofficially.
+ if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
+ && Macro->getNumParams() < 2)
+ return false;
+
+ // Is a comma available to be removed?
+ if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
+ return false;
+
+ // Issue an extension diagnostic for the paste operator.
+ if (HasPasteOperator)
+ PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
+
+ // Remove the comma.
+ ResultToks.pop_back();
+
+ if (!ResultToks.empty()) {
+ // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
+ // then removal of the comma should produce a placemarker token (in C99
+ // terms) which we model by popping off the previous ##, giving us a plain
+ // "X" when __VA_ARGS__ is empty.
+ if (ResultToks.back().is(tok::hashhash))
+ ResultToks.pop_back();
+
+ // Remember that this comma was elided.
+ ResultToks.back().setFlag(Token::CommaAfterElided);
+ }
+
+ // Never add a space, even if the comma, ##, or arg had a space.
+ NextTokGetsSpace = false;
+ return true;
+}
+
+void TokenLexer::stringifyVAOPTContents(
+ SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx,
+ const SourceLocation VAOPTClosingParenLoc) {
+ const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt();
+ const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt;
+ Token *const VAOPTTokens =
+ NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr;
+
+ SmallVector<Token, 64> ConcatenatedVAOPTResultToks;
+ // FIXME: Should we keep track within VCtx that we did or didnot
+ // encounter pasting - and only then perform this loop.
+
+ // Perform token pasting (concatenation) prior to stringization.
+ for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens;
+ ++CurTokenIdx) {
+ if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) {
+ assert(CurTokenIdx != 0 &&
+ "Can not have __VAOPT__ contents begin with a ##");
+ Token &LHS = VAOPTTokens[CurTokenIdx - 1];
+ pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens),
+ CurTokenIdx);
+ // Replace the token prior to the first ## in this iteration.
+ ConcatenatedVAOPTResultToks.back() = LHS;
+ if (CurTokenIdx == NumVAOptTokens)
+ break;
+ }
+ ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]);
+ }
+
+ ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok());
+ // Get the SourceLocation that represents the start location within
+ // the macro definition that marks where this string is substituted
+ // into: i.e. the __VA_OPT__ and the ')' within the spelling of the
+ // macro definition, and use it to indicate that the stringified token
+ // was generated from that location.
+ const SourceLocation ExpansionLocStartWithinMacro =
+ getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc());
+ const SourceLocation ExpansionLocEndWithinMacro =
+ getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc);
+
+ Token StringifiedVAOPT = MacroArgs::StringifyArgument(
+ &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/,
+ ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro);
+
+ if (VCtx.getLeadingSpaceForStringifiedToken())
+ StringifiedVAOPT.setFlag(Token::LeadingSpace);
+
+ StringifiedVAOPT.setFlag(Token::StringifiedInMacro);
+ // Resize (shrink) the token stream to just capture this stringified token.
+ ResultToks.resize(NumToksPriorToVAOpt + 1);
+ ResultToks.back() = StringifiedVAOPT;
+}
+
+/// Expand the arguments of a function-like macro so that we can quickly
+/// return preexpanded tokens from Tokens.
+void TokenLexer::ExpandFunctionArguments() {
+ SmallVector<Token, 128> ResultToks;
+
+ // Loop through 'Tokens', expanding them into ResultToks. Keep
+ // track of whether we change anything. If not, no need to keep them. If so,
+ // we install the newly expanded sequence as the new 'Tokens' list.
+ bool MadeChange = false;
+
+ Optional<bool> CalledWithVariadicArguments;
+
+ VAOptExpansionContext VCtx(PP);
+
+ for (unsigned I = 0, E = NumTokens; I != E; ++I) {
+ const Token &CurTok = Tokens[I];
+ // We don't want a space for the next token after a paste
+ // operator. In valid code, the token will get smooshed onto the
+ // preceding one anyway. In assembler-with-cpp mode, invalid
+ // pastes are allowed through: in this case, we do not want the
+ // extra whitespace to be added. For example, we want ". ## foo"
+ // -> ".foo" not ". foo".
+ if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
+ NextTokGetsSpace = true;
+
+ if (VCtx.isVAOptToken(CurTok)) {
+ MadeChange = true;
+ assert(Tokens[I + 1].is(tok::l_paren) &&
+ "__VA_OPT__ must be followed by '('");
+
+ ++I; // Skip the l_paren
+ VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(),
+ ResultToks.size());
+
+ continue;
+ }
+
+ // We have entered into the __VA_OPT__ context, so handle tokens
+ // appropriately.
+ if (VCtx.isInVAOpt()) {
+ // If we are about to process a token that is either an argument to
+ // __VA_OPT__ or its closing rparen, then:
+ // 1) If the token is the closing rparen that exits us out of __VA_OPT__,
+ // perform any necessary stringification or placemarker processing,
+ // and/or skip to the next token.
+ // 2) else if macro was invoked without variadic arguments skip this
+ // token.
+ // 3) else (macro was invoked with variadic arguments) process the token
+ // normally.
+
+ if (Tokens[I].is(tok::l_paren))
+ VCtx.sawOpeningParen(Tokens[I].getLocation());
+ // Continue skipping tokens within __VA_OPT__ if the macro was not
+ // called with variadic arguments, else let the rest of the loop handle
+ // this token. Note sawClosingParen() returns true only if the r_paren matches
+ // the closing r_paren of the __VA_OPT__.
+ if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) {
+ // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__.
+ if (!CalledWithVariadicArguments.hasValue()) {
+ CalledWithVariadicArguments =
+ ActualArgs->invokedWithVariadicArgument(Macro, PP);
+ }
+ if (!*CalledWithVariadicArguments) {
+ // Skip this token.
+ continue;
+ }
+ // ... else the macro was called with variadic arguments, and we do not
+ // have a closing rparen - so process this token normally.
+ } else {
+ // Current token is the closing r_paren which marks the end of the
+ // __VA_OPT__ invocation, so handle any place-marker pasting (if
+ // empty) by removing hashhash either before (if exists) or after. And
+ // also stringify the entire contents if VAOPT was preceded by a hash,
+ // but do so only after any token concatenation that needs to occur
+ // within the contents of VAOPT.
+
+ if (VCtx.hasStringifyOrCharifyBefore()) {
+ // Replace all the tokens just added from within VAOPT into a single
+ // stringified token. This requires token-pasting to eagerly occur
+ // within these tokens. If either the contents of VAOPT were empty
+ // or the macro wasn't called with any variadic arguments, the result
+ // is a token that represents an empty string.
+ stringifyVAOPTContents(ResultToks, VCtx,
+ /*ClosingParenLoc*/ Tokens[I].getLocation());
+
+ } else if (/*No tokens within VAOPT*/
+ ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) {
+ // Treat VAOPT as a placemarker token. Eat either the '##' before the
+ // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that
+ // hashhash was not a placemarker) or the '##'
+ // after VAOPT, but not both.
+
+ if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) {
+ ResultToks.pop_back();
+ } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) {
+ ++I; // Skip the following hashhash.
+ }
+ } else {
+ // If there's a ## before the __VA_OPT__, we might have discovered
+ // that the __VA_OPT__ begins with a placeholder. We delay action on
+ // that to now to avoid messing up our stashed count of tokens before
+ // __VA_OPT__.
+ if (VCtx.beginsWithPlaceholder()) {
+ assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 &&
+ ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() &&
+ ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is(
+ tok::hashhash) &&
+ "no token paste before __VA_OPT__");
+ ResultToks.erase(ResultToks.begin() +
+ VCtx.getNumberOfTokensPriorToVAOpt() - 1);
+ }
+ // If the expansion of __VA_OPT__ ends with a placeholder, eat any
+ // following '##' token.
+ if (VCtx.endsWithPlaceholder() && I + 1 != E &&
+ Tokens[I + 1].is(tok::hashhash)) {
+ ++I;
+ }
+ }
+ VCtx.reset();
+ // We processed __VA_OPT__'s closing paren (and the exit out of
+ // __VA_OPT__), so skip to the next token.
+ continue;
+ }
+ }
+
+ // If we found the stringify operator, get the argument stringified. The
+ // preprocessor already verified that the following token is a macro
+ // parameter or __VA_OPT__ when the #define was lexed.
+
+ if (CurTok.isOneOf(tok::hash, tok::hashat)) {
+ int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo());
+ assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) &&
+ "Token following # is not an argument or __VA_OPT__!");
+
+ if (ArgNo == -1) {
+ // Handle the __VA_OPT__ case.
+ VCtx.sawHashOrHashAtBefore(NextTokGetsSpace,
+ CurTok.is(tok::hashat));
+ continue;
+ }
+ // Else handle the simple argument case.
+ SourceLocation ExpansionLocStart =
+ getExpansionLocForMacroDefLoc(CurTok.getLocation());
+ SourceLocation ExpansionLocEnd =
+ getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation());
+
+ bool Charify = CurTok.is(tok::hashat);
+ const Token *UnexpArg = ActualArgs->getUnexpArgument(ArgNo);
+ Token Res = MacroArgs::StringifyArgument(
+ UnexpArg, PP, Charify, ExpansionLocStart, ExpansionLocEnd);
+ Res.setFlag(Token::StringifiedInMacro);
+
+ // The stringified/charified string leading space flag gets set to match
+ // the #/#@ operator.
+ if (NextTokGetsSpace)
+ Res.setFlag(Token::LeadingSpace);
+
+ ResultToks.push_back(Res);
+ MadeChange = true;
+ ++I; // Skip arg name.
+ NextTokGetsSpace = false;
+ continue;
+ }
+
+ // Find out if there is a paste (##) operator before or after the token.
+ bool NonEmptyPasteBefore =
+ !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
+ bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash);
+ bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash);
+ bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren);
+
+ assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) &&
+ "unexpected ## in ResultToks");
+
+ // Otherwise, if this is not an argument token, just add the token to the
+ // output buffer.
+ IdentifierInfo *II = CurTok.getIdentifierInfo();
+ int ArgNo = II ? Macro->getParameterNum(II) : -1;
+ if (ArgNo == -1) {
+ // This isn't an argument, just add it.
+ ResultToks.push_back(CurTok);
+
+ if (NextTokGetsSpace) {
+ ResultToks.back().setFlag(Token::LeadingSpace);
+ NextTokGetsSpace = false;
+ } else if (PasteBefore && !NonEmptyPasteBefore)
+ ResultToks.back().clearFlag(Token::LeadingSpace);
+
+ continue;
+ }
+
+ // An argument is expanded somehow, the result is different than the
+ // input.
+ MadeChange = true;
+
+ // Otherwise, this is a use of the argument.
+
+ // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
+ // are no trailing commas if __VA_ARGS__ is empty.
+ if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
+ MaybeRemoveCommaBeforeVaArgs(ResultToks,
+ /*HasPasteOperator=*/false,
+ Macro, ArgNo, PP))
+ continue;
+
+ // If it is not the LHS/RHS of a ## operator, we must pre-expand the
+ // argument and substitute the expanded tokens into the result. This is
+ // C99 6.10.3.1p1.
+ if (!PasteBefore && !PasteAfter) {
+ const Token *ResultArgToks;
+
+ // Only preexpand the argument if it could possibly need it. This
+ // avoids some work in common cases.
+ const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
+ if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
+ ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
+ else
+ ResultArgToks = ArgTok; // Use non-preexpanded tokens.
+
+ // If the arg token expanded into anything, append it.
+ if (ResultArgToks->isNot(tok::eof)) {
+ size_t FirstResult = ResultToks.size();
+ unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
+ ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
+
+ // In Microsoft-compatibility mode, we follow MSVC's preprocessing
+ // behavior by not considering single commas from nested macro
+ // expansions as argument separators. Set a flag on the token so we can
+ // test for this later when the macro expansion is processed.
+ if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
+ ResultToks.back().is(tok::comma))
+ ResultToks.back().setFlag(Token::IgnoredComma);
+
+ // If the '##' came from expanding an argument, turn it into 'unknown'
+ // to avoid pasting.
+ for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult,
+ ResultToks.end())) {
+ if (Tok.is(tok::hashhash))
+ Tok.setKind(tok::unknown);
+ }
+
+ if(ExpandLocStart.isValid()) {
+ updateLocForMacroArgTokens(CurTok.getLocation(),
+ ResultToks.begin()+FirstResult,
+ ResultToks.end());
+ }
+
+ // If any tokens were substituted from the argument, the whitespace
+ // before the first token should match the whitespace of the arg
+ // identifier.
+ ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
+ NextTokGetsSpace);
+ ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
+ NextTokGetsSpace = false;
+ } else {
+ // We're creating a placeholder token. Usually this doesn't matter,
+ // but it can affect paste behavior when at the start or end of a
+ // __VA_OPT__.
+ if (NonEmptyPasteBefore) {
+ // We're imagining a placeholder token is inserted here. If this is
+ // the first token in a __VA_OPT__ after a ##, delete the ##.
+ assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__");
+ VCtx.hasPlaceholderAfterHashhashAtStart();
+ }
+ if (RParenAfter)
+ VCtx.hasPlaceholderBeforeRParen();
+ }
+ continue;
+ }
+
+ // Okay, we have a token that is either the LHS or RHS of a paste (##)
+ // argument. It gets substituted as its non-pre-expanded tokens.
+ const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
+ unsigned NumToks = MacroArgs::getArgLength(ArgToks);
+ if (NumToks) { // Not an empty argument?
+ bool VaArgsPseudoPaste = false;
+ // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
+ // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
+ // the expander tries to paste ',' with the first token of the __VA_ARGS__
+ // expansion.
+ if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
+ ResultToks[ResultToks.size()-2].is(tok::comma) &&
+ (unsigned)ArgNo == Macro->getNumParams()-1 &&
+ Macro->isVariadic()) {
+ VaArgsPseudoPaste = true;
+ // Remove the paste operator, report use of the extension.
+ PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
+ }
+
+ ResultToks.append(ArgToks, ArgToks+NumToks);
+
+ // If the '##' came from expanding an argument, turn it into 'unknown'
+ // to avoid pasting.
+ for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks,
+ ResultToks.end())) {
+ if (Tok.is(tok::hashhash))
+ Tok.setKind(tok::unknown);
+ }
+
+ if (ExpandLocStart.isValid()) {
+ updateLocForMacroArgTokens(CurTok.getLocation(),
+ ResultToks.end()-NumToks, ResultToks.end());
+ }
+
+ // Transfer the leading whitespace information from the token
+ // (the macro argument) onto the first token of the
+ // expansion. Note that we don't do this for the GNU
+ // pseudo-paste extension ", ## __VA_ARGS__".
+ if (!VaArgsPseudoPaste) {
+ ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine,
+ false);
+ ResultToks[ResultToks.size() - NumToks].setFlagValue(
+ Token::LeadingSpace, NextTokGetsSpace);
+ }
+
+ NextTokGetsSpace = false;
+ continue;
+ }
+
+ // If an empty argument is on the LHS or RHS of a paste, the standard (C99
+ // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
+ // implement this by eating ## operators when a LHS or RHS expands to
+ // empty.
+ if (PasteAfter) {
+ // Discard the argument token and skip (don't copy to the expansion
+ // buffer) the paste operator after it.
+ ++I;
+ continue;
+ }
+
+ if (RParenAfter)
+ VCtx.hasPlaceholderBeforeRParen();
+
+ // If this is on the RHS of a paste operator, we've already copied the
+ // paste operator to the ResultToks list, unless the LHS was empty too.
+ // Remove it.
+ assert(PasteBefore);
+ if (NonEmptyPasteBefore) {
+ assert(ResultToks.back().is(tok::hashhash));
+ // Do not remove the paste operator if it is the one before __VA_OPT__
+ // (and we are still processing tokens within VA_OPT). We handle the case
+ // of removing the paste operator if __VA_OPT__ reduces to the notional
+ // placemarker above when we encounter the closing paren of VA_OPT.
+ if (!VCtx.isInVAOpt() ||
+ ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt())
+ ResultToks.pop_back();
+ else
+ VCtx.hasPlaceholderAfterHashhashAtStart();
+ }
+
+ // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
+ // and if the macro had at least one real argument, and if the token before
+ // the ## was a comma, remove the comma. This is a GCC extension which is
+ // disabled when using -std=c99.
+ if (ActualArgs->isVarargsElidedUse())
+ MaybeRemoveCommaBeforeVaArgs(ResultToks,
+ /*HasPasteOperator=*/true,
+ Macro, ArgNo, PP);
+ }
+
+ // If anything changed, install this as the new Tokens list.
+ if (MadeChange) {
+ assert(!OwnsTokens && "This would leak if we already own the token list");
+ // This is deleted in the dtor.
+ NumTokens = ResultToks.size();
+ // The tokens will be added to Preprocessor's cache and will be removed
+ // when this TokenLexer finishes lexing them.
+ Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
+
+ // The preprocessor cache of macro expanded tokens owns these tokens,not us.
+ OwnsTokens = false;
+ }
+}
+
+/// Checks if two tokens form wide string literal.
+static bool isWideStringLiteralFromMacro(const Token &FirstTok,
+ const Token &SecondTok) {
+ return FirstTok.is(tok::identifier) &&
+ FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() &&
+ SecondTok.stringifiedInMacro();
+}
+
+/// Lex - Lex and return a token from this macro stream.
+bool TokenLexer::Lex(Token &Tok) {
+ // Lexing off the end of the macro, pop this macro off the expansion stack.
+ if (isAtEnd()) {
+ // If this is a macro (not a token stream), mark the macro enabled now
+ // that it is no longer being expanded.
+ if (Macro) Macro->EnableMacro();
+
+ Tok.startToken();
+ Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+ Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
+ if (CurTokenIdx == 0)
+ Tok.setFlag(Token::LeadingEmptyMacro);
+ return PP.HandleEndOfTokenLexer(Tok);
+ }
+
+ SourceManager &SM = PP.getSourceManager();
+
+ // If this is the first token of the expanded result, we inherit spacing
+ // properties later.
+ bool isFirstToken = CurTokenIdx == 0;
+
+ // Get the next token to return.
+ Tok = Tokens[CurTokenIdx++];
+ if (IsReinject)
+ Tok.setFlag(Token::IsReinjected);
+
+ bool TokenIsFromPaste = false;
+
+ // If this token is followed by a token paste (##) operator, paste the tokens!
+ // Note that ## is a normal token when not expanding a macro.
+ if (!isAtEnd() && Macro &&
+ (Tokens[CurTokenIdx].is(tok::hashhash) ||
+ // Special processing of L#x macros in -fms-compatibility mode.
+ // Microsoft compiler is able to form a wide string literal from
+ // 'L#macro_arg' construct in a function-like macro.
+ (PP.getLangOpts().MSVCCompat &&
+ isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) {
+ // When handling the microsoft /##/ extension, the final token is
+ // returned by pasteTokens, not the pasted token.
+ if (pasteTokens(Tok))
+ return true;
+
+ TokenIsFromPaste = true;
+ }
+
+ // The token's current location indicate where the token was lexed from. We
+ // need this information to compute the spelling of the token, but any
+ // diagnostics for the expanded token should appear as if they came from
+ // ExpansionLoc. Pull this information together into a new SourceLocation
+ // that captures all of this.
+ if (ExpandLocStart.isValid() && // Don't do this for token streams.
+ // Check that the token's location was not already set properly.
+ SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
+ SourceLocation instLoc;
+ if (Tok.is(tok::comment)) {
+ instLoc = SM.createExpansionLoc(Tok.getLocation(),
+ ExpandLocStart,
+ ExpandLocEnd,
+ Tok.getLength());
+ } else {
+ instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
+ }
+
+ Tok.setLocation(instLoc);
+ }
+
+ // If this is the first token, set the lexical properties of the token to
+ // match the lexical properties of the macro identifier.
+ if (isFirstToken) {
+ Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+ Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+ } else {
+ // If this is not the first token, we may still need to pass through
+ // leading whitespace if we've expanded a macro.
+ if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
+ if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
+ }
+ AtStartOfLine = false;
+ HasLeadingSpace = false;
+
+ // Handle recursive expansion!
+ if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
+ // Change the kind of this identifier to the appropriate token kind, e.g.
+ // turning "for" into a keyword.
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+ Tok.setKind(II->getTokenID());
+
+ // If this identifier was poisoned and from a paste, emit an error. This
+ // won't be handled by Preprocessor::HandleIdentifier because this is coming
+ // from a macro expansion.
+ if (II->isPoisoned() && TokenIsFromPaste) {
+ PP.HandlePoisonedIdentifier(Tok);
+ }
+
+ if (!DisableMacroExpansion && II->isHandleIdentifierCase())
+ return PP.HandleIdentifier(Tok);
+ }
+
+ // Otherwise, return a normal token.
+ return true;
+}
+
+bool TokenLexer::pasteTokens(Token &Tok) {
+ return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx);
+}
+
+/// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ##
+/// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
+/// are more ## after it, chomp them iteratively. Return the result as LHSTok.
+/// If this returns true, the caller should immediately return the token.
+bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
+ unsigned int &CurIdx) {
+ assert(CurIdx > 0 && "## can not be the first token within tokens");
+ assert((TokenStream[CurIdx].is(tok::hashhash) ||
+ (PP.getLangOpts().MSVCCompat &&
+ isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) &&
+ "Token at this Index must be ## or part of the MSVC 'L "
+ "#macro-arg' pasting pair");
+
+ // MSVC: If previous token was pasted, this must be a recovery from an invalid
+ // paste operation. Ignore spaces before this token to mimic MSVC output.
+ // Required for generating valid UUID strings in some MS headers.
+ if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) &&
+ TokenStream[CurIdx - 2].is(tok::hashhash))
+ LHSTok.clearFlag(Token::LeadingSpace);
+
+ SmallString<128> Buffer;
+ const char *ResultTokStrPtr = nullptr;
+ SourceLocation StartLoc = LHSTok.getLocation();
+ SourceLocation PasteOpLoc;
+
+ auto IsAtEnd = [&TokenStream, &CurIdx] {
+ return TokenStream.size() == CurIdx;
+ };
+
+ do {
+ // Consume the ## operator if any.
+ PasteOpLoc = TokenStream[CurIdx].getLocation();
+ if (TokenStream[CurIdx].is(tok::hashhash))
+ ++CurIdx;
+ assert(!IsAtEnd() && "No token on the RHS of a paste operator!");
+
+ // Get the RHS token.
+ const Token &RHS = TokenStream[CurIdx];
+
+ // Allocate space for the result token. This is guaranteed to be enough for
+ // the two tokens.
+ Buffer.resize(LHSTok.getLength() + RHS.getLength());
+
+ // Get the spelling of the LHS token in Buffer.
+ const char *BufPtr = &Buffer[0];
+ bool Invalid = false;
+ unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid);
+ if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
+ memcpy(&Buffer[0], BufPtr, LHSLen);
+ if (Invalid)
+ return true;
+
+ BufPtr = Buffer.data() + LHSLen;
+ unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
+ if (Invalid)
+ return true;
+ if (RHSLen && BufPtr != &Buffer[LHSLen])
+ // Really, we want the chars in Buffer!
+ memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
+
+ // Trim excess space.
+ Buffer.resize(LHSLen+RHSLen);
+
+ // Plop the pasted result (including the trailing newline and null) into a
+ // scratch buffer where we can lex it.
+ Token ResultTokTmp;
+ ResultTokTmp.startToken();
+
+ // Claim that the tmp token is a string_literal so that we can get the
+ // character pointer back from CreateString in getLiteralData().
+ ResultTokTmp.setKind(tok::string_literal);
+ PP.CreateString(Buffer, ResultTokTmp);
+ SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
+ ResultTokStrPtr = ResultTokTmp.getLiteralData();
+
+ // Lex the resultant pasted token into Result.
+ Token Result;
+
+ if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
+ // Common paste case: identifier+identifier = identifier. Avoid creating
+ // a lexer and other overhead.
+ PP.IncrementPasteCounter(true);
+ Result.startToken();
+ Result.setKind(tok::raw_identifier);
+ Result.setRawIdentifierData(ResultTokStrPtr);
+ Result.setLocation(ResultTokLoc);
+ Result.setLength(LHSLen+RHSLen);
+ } else {
+ PP.IncrementPasteCounter(false);
+
+ assert(ResultTokLoc.isFileID() &&
+ "Should be a raw location into scratch buffer");
+ SourceManager &SourceMgr = PP.getSourceManager();
+ FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
+
+ bool Invalid = false;
+ const char *ScratchBufStart
+ = SourceMgr.getBufferData(LocFileID, &Invalid).data();
+ if (Invalid)
+ return false;
+
+ // Make a lexer to lex this string from. Lex just this one token.
+ // Make a lexer object so that we lex and expand the paste result.
+ Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
+ PP.getLangOpts(), ScratchBufStart,
+ ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
+
+ // Lex a token in raw mode. This way it won't look up identifiers
+ // automatically, lexing off the end will return an eof token, and
+ // warnings are disabled. This returns true if the result token is the
+ // entire buffer.
+ bool isInvalid = !TL.LexFromRawLexer(Result);
+
+ // If we got an EOF token, we didn't form even ONE token. For example, we
+ // did "/ ## /" to get "//".
+ isInvalid |= Result.is(tok::eof);
+
+ // If pasting the two tokens didn't form a full new token, this is an
+ // error. This occurs with "x ## +" and other stuff. Return with LHSTok
+ // unmodified and with RHS as the next token to lex.
+ if (isInvalid) {
+ // Explicitly convert the token location to have proper expansion
+ // information so that the user knows where it came from.
+ SourceManager &SM = PP.getSourceManager();
+ SourceLocation Loc =
+ SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
+
+ // Test for the Microsoft extension of /##/ turning into // here on the
+ // error path.
+ if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) &&
+ RHS.is(tok::slash)) {
+ HandleMicrosoftCommentPaste(LHSTok, Loc);
+ return true;
+ }
+
+ // Do not emit the error when preprocessing assembler code.
+ if (!PP.getLangOpts().AsmPreprocessor) {
+ // If we're in microsoft extensions mode, downgrade this from a hard
+ // error to an extension that defaults to an error. This allows
+ // disabling it.
+ PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
+ : diag::err_pp_bad_paste)
+ << Buffer;
+ }
+
+ // An error has occurred so exit loop.
+ break;
+ }
+
+ // Turn ## into 'unknown' to avoid # ## # from looking like a paste
+ // operator.
+ if (Result.is(tok::hashhash))
+ Result.setKind(tok::unknown);
+ }
+
+ // Transfer properties of the LHS over the Result.
+ Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine());
+ Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace());
+
+ // Finally, replace LHS with the result, consume the RHS, and iterate.
+ ++CurIdx;
+ LHSTok = Result;
+ } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash));
+
+ SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation();
+
+ // The token's current location indicate where the token was lexed from. We
+ // need this information to compute the spelling of the token, but any
+ // diagnostics for the expanded token should appear as if the token was
+ // expanded from the full ## expression. Pull this information together into
+ // a new SourceLocation that captures all of this.
+ SourceManager &SM = PP.getSourceManager();
+ if (StartLoc.isFileID())
+ StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
+ if (EndLoc.isFileID())
+ EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
+ FileID MacroFID = SM.getFileID(MacroExpansionStart);
+ while (SM.getFileID(StartLoc) != MacroFID)
+ StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin();
+ while (SM.getFileID(EndLoc) != MacroFID)
+ EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
+
+ LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc,
+ LHSTok.getLength()));
+
+ // Now that we got the result token, it will be subject to expansion. Since
+ // token pasting re-lexes the result token in raw mode, identifier information
+ // isn't looked up. As such, if the result is an identifier, look up id info.
+ if (LHSTok.is(tok::raw_identifier)) {
+ // Look up the identifier info for the token. We disabled identifier lookup
+ // by saying we're skipping contents, so we need to do this manually.
+ PP.LookUpIdentifierInfo(LHSTok);
+ }
+ return false;
+}
+
+/// isNextTokenLParen - If the next token lexed will pop this macro off the
+/// expansion stack, return 2. If the next unexpanded token is a '(', return
+/// 1, otherwise return 0.
+unsigned TokenLexer::isNextTokenLParen() const {
+ // Out of tokens?
+ if (isAtEnd())
+ return 2;
+ return Tokens[CurTokenIdx].is(tok::l_paren);
+}
+
+/// isParsingPreprocessorDirective - Return true if we are in the middle of a
+/// preprocessor directive.
+bool TokenLexer::isParsingPreprocessorDirective() const {
+ return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
+}
+
+/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
+/// together to form a comment that comments out everything in the current
+/// macro, other active macros, and anything left on the current physical
+/// source line of the expanded buffer. Handle this by returning the
+/// first token on the next line.
+void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
+ PP.Diag(OpLoc, diag::ext_comment_paste_microsoft);
+
+ // We 'comment out' the rest of this macro by just ignoring the rest of the
+ // tokens that have not been lexed yet, if any.
+
+ // Since this must be a macro, mark the macro enabled now that it is no longer
+ // being expanded.
+ assert(Macro && "Token streams can't paste comments");
+ Macro->EnableMacro();
+
+ PP.HandleMicrosoftCommentPaste(Tok);
+}
+
+/// If \arg loc is a file ID and points inside the current macro
+/// definition, returns the appropriate source location pointing at the
+/// macro expansion source location entry, otherwise it returns an invalid
+/// SourceLocation.
+SourceLocation
+TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
+ assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
+ "Not appropriate for token streams");
+ assert(loc.isValid() && loc.isFileID());
+
+ SourceManager &SM = PP.getSourceManager();
+ assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
+ "Expected loc to come from the macro definition");
+
+ unsigned relativeOffset = 0;
+ SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
+ return MacroExpansionStart.getLocWithOffset(relativeOffset);
+}
+
+/// Finds the tokens that are consecutive (from the same FileID)
+/// creates a single SLocEntry, and assigns SourceLocations to each token that
+/// point to that SLocEntry. e.g for
+/// assert(foo == bar);
+/// There will be a single SLocEntry for the "foo == bar" chunk and locations
+/// for the 'foo', '==', 'bar' tokens will point inside that chunk.
+///
+/// \arg begin_tokens will be updated to a position past all the found
+/// consecutive tokens.
+static void updateConsecutiveMacroArgTokens(SourceManager &SM,
+ SourceLocation InstLoc,
+ Token *&begin_tokens,
+ Token * end_tokens) {
+ assert(begin_tokens < end_tokens);
+
+ SourceLocation FirstLoc = begin_tokens->getLocation();
+ SourceLocation CurLoc = FirstLoc;
+
+ // Compare the source location offset of tokens and group together tokens that
+ // are close, even if their locations point to different FileIDs. e.g.
+ //
+ // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
+ // ^ ^
+ // |bar foo cake| (one SLocEntry chunk for all tokens)
+ //
+ // we can perform this "merge" since the token's spelling location depends
+ // on the relative offset.
+
+ Token *NextTok = begin_tokens + 1;
+ for (; NextTok < end_tokens; ++NextTok) {
+ SourceLocation NextLoc = NextTok->getLocation();
+ if (CurLoc.isFileID() != NextLoc.isFileID())
+ break; // Token from different kind of FileID.
+
+ int RelOffs;
+ if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
+ break; // Token from different local/loaded location.
+ // Check that token is not before the previous token or more than 50
+ // "characters" away.
+ if (RelOffs < 0 || RelOffs > 50)
+ break;
+
+ if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
+ break; // Token from a different macro.
+
+ CurLoc = NextLoc;
+ }
+
+ // For the consecutive tokens, find the length of the SLocEntry to contain
+ // all of them.
+ Token &LastConsecutiveTok = *(NextTok-1);
+ int LastRelOffs = 0;
+ SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
+ &LastRelOffs);
+ unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();
+
+ // Create a macro expansion SLocEntry that will "contain" all of the tokens.
+ SourceLocation Expansion =
+ SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
+
+ // Change the location of the tokens from the spelling location to the new
+ // expanded location.
+ for (; begin_tokens < NextTok; ++begin_tokens) {
+ Token &Tok = *begin_tokens;
+ int RelOffs = 0;
+ SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
+ Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
+ }
+}
+
+/// Creates SLocEntries and updates the locations of macro argument
+/// tokens to their new expanded locations.
+///
+/// \param ArgIdSpellLoc the location of the macro argument id inside the macro
+/// definition.
+void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
+ Token *begin_tokens,
+ Token *end_tokens) {
+ SourceManager &SM = PP.getSourceManager();
+
+ SourceLocation InstLoc =
+ getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
+
+ while (begin_tokens < end_tokens) {
+ // If there's only one token just create a SLocEntry for it.
+ if (end_tokens - begin_tokens == 1) {
+ Token &Tok = *begin_tokens;
+ Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
+ InstLoc,
+ Tok.getLength()));
+ return;
+ }
+
+ updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
+ }
+}
+
+void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+ AtStartOfLine = Result.isAtStartOfLine();
+ HasLeadingSpace = Result.hasLeadingSpace();
+}
diff --git a/clang/lib/Lex/UnicodeCharSets.h b/clang/lib/Lex/UnicodeCharSets.h
new file mode 100644
index 000000000000..74dd57fdf118
--- /dev/null
+++ b/clang/lib/Lex/UnicodeCharSets.h
@@ -0,0 +1,407 @@
+//===--- UnicodeCharSets.h - Contains important sets of characters --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_LIB_LEX_UNICODECHARSETS_H
+#define LLVM_CLANG_LIB_LEX_UNICODECHARSETS_H
+
+#include "llvm/Support/UnicodeCharRanges.h"
+
+// C11 D.1, C++11 [charname.allowed]
+static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = {
+ // 1
+ { 0x00A8, 0x00A8 }, { 0x00AA, 0x00AA }, { 0x00AD, 0x00AD },
+ { 0x00AF, 0x00AF }, { 0x00B2, 0x00B5 }, { 0x00B7, 0x00BA },
+ { 0x00BC, 0x00BE }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 },
+ { 0x00F8, 0x00FF },
+ // 2
+ { 0x0100, 0x167F }, { 0x1681, 0x180D }, { 0x180F, 0x1FFF },
+ // 3
+ { 0x200B, 0x200D }, { 0x202A, 0x202E }, { 0x203F, 0x2040 },
+ { 0x2054, 0x2054 }, { 0x2060, 0x206F },
+ // 4
+ { 0x2070, 0x218F }, { 0x2460, 0x24FF }, { 0x2776, 0x2793 },
+ { 0x2C00, 0x2DFF }, { 0x2E80, 0x2FFF },
+ // 5
+ { 0x3004, 0x3007 }, { 0x3021, 0x302F }, { 0x3031, 0x303F },
+ // 6
+ { 0x3040, 0xD7FF },
+ // 7
+ { 0xF900, 0xFD3D }, { 0xFD40, 0xFDCF }, { 0xFDF0, 0xFE44 },
+ { 0xFE47, 0xFFFD },
+ // 8
+ { 0x10000, 0x1FFFD }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD },
+ { 0x40000, 0x4FFFD }, { 0x50000, 0x5FFFD }, { 0x60000, 0x6FFFD },
+ { 0x70000, 0x7FFFD }, { 0x80000, 0x8FFFD }, { 0x90000, 0x9FFFD },
+ { 0xA0000, 0xAFFFD }, { 0xB0000, 0xBFFFD }, { 0xC0000, 0xCFFFD },
+ { 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD }
+};
+
+// C++03 [extendid]
+// Note that this is not the same as C++98, but we don't distinguish C++98
+// and C++03 in Clang.
+static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[] = {
+ // Latin
+ { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 },
+ { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 },
+
+ // Greek
+ { 0x0384, 0x0384 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
+ { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
+ { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
+ { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 },
+
+ // Cyrillic
+ { 0x0401, 0x040D }, { 0x040F, 0x044F }, { 0x0451, 0x045C },
+ { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 },
+ { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 },
+ { 0x04F8, 0x04F9 },
+
+ // Armenian
+ { 0x0531, 0x0556 }, { 0x0561, 0x0587 },
+
+ // Hebrew
+ { 0x05D0, 0x05EA }, { 0x05F0, 0x05F4 },
+
+ // Arabic
+ { 0x0621, 0x063A }, { 0x0640, 0x0652 }, { 0x0670, 0x06B7 },
+ { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06E5, 0x06E7 },
+
+ // Devanagari
+ { 0x0905, 0x0939 }, { 0x0958, 0x0962 },
+
+ // Bengali
+ { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 },
+ { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 },
+ { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 },
+
+ // Gurmukhi
+ { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 },
+ { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 },
+ { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E },
+
+ // Gujarti
+ { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 },
+ { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 },
+ { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE0 },
+
+ // Oriya
+ { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 },
+ { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 },
+ { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 },
+
+ // Tamil
+ { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 },
+ { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F },
+ { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 },
+ { 0x0BB7, 0x0BB9 },
+
+ // Telugu
+ { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 },
+ { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 },
+
+ // Kannada
+ { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 },
+ { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CE0, 0x0CE1 },
+
+ // Malayam
+ { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 },
+ { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 },
+
+ // Thai
+ { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 },
+ { 0x0E4F, 0x0E5B },
+
+ // Lao
+ { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E87 },
+ { 0x0E88, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D },
+ { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 },
+ { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAA },
+ { 0x0EAB, 0x0EAB }, { 0x0EAD, 0x0EB0 }, { 0x0EB2, 0x0EB2 },
+ { 0x0EB3, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 },
+ { 0x0EC6, 0x0EC6 },
+
+ // Georgian
+ { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
+
+ // Hangul
+ { 0x1100, 0x1159 }, { 0x1161, 0x11A2 }, { 0x11A8, 0x11F9 },
+
+ // Latin (2)
+ { 0x1E00, 0x1E9A }, { 0x1EA0, 0x1EF9 },
+
+ // Greek (2)
+ { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 },
+ { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 },
+ { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D },
+ { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 },
+ { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB },
+ { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC },
+
+ // Hiragana
+ { 0x3041, 0x3094 }, { 0x309B, 0x309E },
+
+ // Katakana
+ { 0x30A1, 0x30FE },
+
+ // Bopmofo [sic]
+ { 0x3105, 0x312C },
+
+ // CJK Unified Ideographs
+ { 0x4E00, 0x9FA5 }, { 0xF900, 0xFA2D }, { 0xFB1F, 0xFB36 },
+ { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 },
+ { 0xFB42, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3F },
+ { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB },
+ { 0xFE70, 0xFE72 }, { 0xFE74, 0xFE74 }, { 0xFE76, 0xFEFC },
+ { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE },
+ { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 },
+ { 0xFFDA, 0xFFDC }
+};
+
+// C99 Annex D
+static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[] = {
+ // Latin (1)
+ { 0x00AA, 0x00AA },
+
+ // Special characters (1)
+ { 0x00B5, 0x00B5 }, { 0x00B7, 0x00B7 },
+
+ // Latin (2)
+ { 0x00BA, 0x00BA }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 },
+ { 0x00F8, 0x01F5 }, { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 },
+
+ // Special characters (2)
+ { 0x02B0, 0x02B8 }, { 0x02BB, 0x02BB }, { 0x02BD, 0x02C1 },
+ { 0x02D0, 0x02D1 }, { 0x02E0, 0x02E4 }, { 0x037A, 0x037A },
+
+ // Greek (1)
+ { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
+ { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
+ { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
+ { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 },
+
+ // Cyrillic
+ { 0x0401, 0x040C }, { 0x040E, 0x044F }, { 0x0451, 0x045C },
+ { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 },
+ { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 },
+ { 0x04F8, 0x04F9 },
+
+ // Armenian (1)
+ { 0x0531, 0x0556 },
+
+ // Special characters (3)
+ { 0x0559, 0x0559 },
+
+ // Armenian (2)
+ { 0x0561, 0x0587 },
+
+ // Hebrew
+ { 0x05B0, 0x05B9 }, { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF },
+ { 0x05C1, 0x05C2 }, { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 },
+
+ // Arabic (1)
+ { 0x0621, 0x063A }, { 0x0640, 0x0652 },
+
+ // Digits (1)
+ { 0x0660, 0x0669 },
+
+ // Arabic (2)
+ { 0x0670, 0x06B7 }, { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE },
+ { 0x06D0, 0x06DC }, { 0x06E5, 0x06E8 }, { 0x06EA, 0x06ED },
+
+ // Digits (2)
+ { 0x06F0, 0x06F9 },
+
+ // Devanagari and Special character 0x093D.
+ { 0x0901, 0x0903 }, { 0x0905, 0x0939 }, { 0x093D, 0x094D },
+ { 0x0950, 0x0952 }, { 0x0958, 0x0963 },
+
+ // Digits (3)
+ { 0x0966, 0x096F },
+
+ // Bengali (1)
+ { 0x0981, 0x0983 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 },
+ { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 },
+ { 0x09B6, 0x09B9 }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 },
+ { 0x09CB, 0x09CD }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E3 },
+
+ // Digits (4)
+ { 0x09E6, 0x09EF },
+
+ // Bengali (2)
+ { 0x09F0, 0x09F1 },
+
+ // Gurmukhi (1)
+ { 0x0A02, 0x0A02 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 },
+ { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 },
+ { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A3E, 0x0A42 },
+ { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A59, 0x0A5C },
+ { 0x0A5E, 0x0A5E },
+
+ // Digits (5)
+ { 0x0A66, 0x0A6F },
+
+ // Gurmukhi (2)
+ { 0x0A74, 0x0A74 },
+
+ // Gujarti
+ { 0x0A81, 0x0A83 }, { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D },
+ { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 },
+ { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 }, { 0x0ABD, 0x0AC5 },
+ { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AD0, 0x0AD0 },
+ { 0x0AE0, 0x0AE0 },
+
+ // Digits (6)
+ { 0x0AE6, 0x0AEF },
+
+ // Oriya and Special character 0x0B3D
+ { 0x0B01, 0x0B03 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 },
+ { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 },
+ { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B43 }, { 0x0B47, 0x0B48 },
+ { 0x0B4B, 0x0B4D }, { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 },
+
+ // Digits (7)
+ { 0x0B66, 0x0B6F },
+
+ // Tamil
+ { 0x0B82, 0x0B83 }, { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 },
+ { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C },
+ { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA },
+ { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 }, { 0x0BBE, 0x0BC2 },
+ { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD },
+
+ // Digits (8)
+ { 0x0BE7, 0x0BEF },
+
+ // Telugu
+ { 0x0C01, 0x0C03 }, { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 },
+ { 0x0C12, 0x0C28 }, { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 },
+ { 0x0C3E, 0x0C44 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D },
+ { 0x0C60, 0x0C61 },
+
+ // Digits (9)
+ { 0x0C66, 0x0C6F },
+
+ // Kannada
+ { 0x0C82, 0x0C83 }, { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 },
+ { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 },
+ { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD },
+ { 0x0CDE, 0x0CDE }, { 0x0CE0, 0x0CE1 },
+
+ // Digits (10)
+ { 0x0CE6, 0x0CEF },
+
+ // Malayam
+ { 0x0D02, 0x0D03 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 },
+ { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D3E, 0x0D43 },
+ { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, { 0x0D60, 0x0D61 },
+
+ // Digits (11)
+ { 0x0D66, 0x0D6F },
+
+ // Thai...including Digits { 0x0E50, 0x0E59 }
+ { 0x0E01, 0x0E3A }, { 0x0E40, 0x0E5B },
+
+ // Lao (1)
+ { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E88 },
+ { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, { 0x0E94, 0x0E97 },
+ { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, { 0x0EA5, 0x0EA5 },
+ { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB }, { 0x0EAD, 0x0EAE },
+ { 0x0EB0, 0x0EB9 }, { 0x0EBB, 0x0EBD }, { 0x0EC0, 0x0EC4 },
+ { 0x0EC6, 0x0EC6 }, { 0x0EC8, 0x0ECD },
+
+ // Digits (12)
+ { 0x0ED0, 0x0ED9 },
+
+ // Lao (2)
+ { 0x0EDC, 0x0EDD },
+
+ // Tibetan (1)
+ { 0x0F00, 0x0F00 }, { 0x0F18, 0x0F19 },
+
+ // Digits (13)
+ { 0x0F20, 0x0F33 },
+
+ // Tibetan (2)
+ { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
+ { 0x0F3E, 0x0F47 }, { 0x0F49, 0x0F69 }, { 0x0F71, 0x0F84 },
+ { 0x0F86, 0x0F8B }, { 0x0F90, 0x0F95 }, { 0x0F97, 0x0F97 },
+ { 0x0F99, 0x0FAD }, { 0x0FB1, 0x0FB7 }, { 0x0FB9, 0x0FB9 },
+
+ // Georgian
+ { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
+
+ // Latin (3)
+ { 0x1E00, 0x1E9B }, { 0x1EA0, 0x1EF9 },
+
+ // Greek (2)
+ { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 },
+ { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 },
+ { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D },
+ { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC },
+
+ // Special characters (4)
+ { 0x1FBE, 0x1FBE },
+
+ // Greek (3)
+ { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 },
+ { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 },
+ { 0x1FF6, 0x1FFC },
+
+ // Special characters (5)
+ { 0x203F, 0x2040 },
+
+ // Latin (4)
+ { 0x207F, 0x207F },
+
+ // Special characters (6)
+ { 0x2102, 0x2102 }, { 0x2107, 0x2107 }, { 0x210A, 0x2113 },
+ { 0x2115, 0x2115 }, { 0x2118, 0x211D }, { 0x2124, 0x2124 },
+ { 0x2126, 0x2126 }, { 0x2128, 0x2128 }, { 0x212A, 0x2131 },
+ { 0x2133, 0x2138 }, { 0x2160, 0x2182 }, { 0x3005, 0x3007 },
+ { 0x3021, 0x3029 },
+
+ // Hiragana
+ { 0x3041, 0x3093 }, { 0x309B, 0x309C },
+
+ // Katakana
+ { 0x30A1, 0x30F6 }, { 0x30FB, 0x30FC },
+
+ // Bopmofo [sic]
+ { 0x3105, 0x312C },
+
+ // CJK Unified Ideographs
+ { 0x4E00, 0x9FA5 },
+
+ // Hangul,
+ { 0xAC00, 0xD7A3 }
+};
+
+// C11 D.2, C++11 [charname.disallowed]
+static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[] = {
+ { 0x0300, 0x036F }, { 0x1DC0, 0x1DFF }, { 0x20D0, 0x20FF },
+ { 0xFE20, 0xFE2F }
+};
+
+// C99 6.4.2.1p3: The initial character [of an identifier] shall not be a
+// universal character name designating a digit.
+// C99 Annex D defines these characters as "Digits".
+static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[] = {
+ { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 }, { 0x0966, 0x096F },
+ { 0x09E6, 0x09EF }, { 0x0A66, 0x0A6F }, { 0x0AE6, 0x0AEF },
+ { 0x0B66, 0x0B6F }, { 0x0BE7, 0x0BEF }, { 0x0C66, 0x0C6F },
+ { 0x0CE6, 0x0CEF }, { 0x0D66, 0x0D6F }, { 0x0E50, 0x0E59 },
+ { 0x0ED0, 0x0ED9 }, { 0x0F20, 0x0F33 }
+};
+
+// Unicode v6.2, chapter 6.2, table 6-2.
+static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[] = {
+ { 0x0085, 0x0085 }, { 0x00A0, 0x00A0 }, { 0x1680, 0x1680 },
+ { 0x180E, 0x180E }, { 0x2000, 0x200A }, { 0x2028, 0x2029 },
+ { 0x202F, 0x202F }, { 0x205F, 0x205F }, { 0x3000, 0x3000 }
+};
+
+#endif