diff options
Diffstat (limited to 'clang/lib/Lex')
23 files changed, 25116 insertions, 0 deletions
diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp new file mode 100644 index 000000000000..f063ed711c44 --- /dev/null +++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -0,0 +1,955 @@ +//===- DependencyDirectivesSourceMinimizer.cpp -  -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the implementation for minimizing header and source files to the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Lex/LexDiagnostic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace clang; +using namespace clang::minimize_source_to_dependency_directives; + +namespace { + +struct Minimizer { +  /// Minimized output. +  SmallVectorImpl<char> &Out; +  /// The known tokens encountered during the minimization. +  SmallVectorImpl<Token> &Tokens; + +  Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens, +            StringRef Input, DiagnosticsEngine *Diags, +            SourceLocation InputSourceLoc) +      : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), +        InputSourceLoc(InputSourceLoc) {} + +  /// Lex the provided source and emit the minimized output. +  /// +  /// \returns True on error. +  bool minimize(); + +private: +  struct IdInfo { +    const char *Last; +    StringRef Name; +  }; + +  /// Lex an identifier. +  /// +  /// \pre First points at a valid identifier head. +  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); +  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, +                                       const char *const End); +  LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); +  LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); +  LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); +  LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); +  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); +  LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); +  LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); +  LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, +                                 const char *&First, const char *const End); +  Token &makeToken(TokenKind K) { +    Tokens.emplace_back(K, Out.size()); +    return Tokens.back(); +  } +  void popToken() { +    Out.resize(Tokens.back().Offset); +    Tokens.pop_back(); +  } +  TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } + +  Minimizer &put(char Byte) { +    Out.push_back(Byte); +    return *this; +  } +  Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } +  Minimizer &append(const char *First, const char *Last) { +    Out.append(First, Last); +    return *this; +  } + +  void printToNewline(const char *&First, const char *const End); +  void printAdjacentModuleNameParts(const char *&First, const char *const End); +  LLVM_NODISCARD bool printAtImportBody(const char *&First, +                                        const char *const End); +  void printDirectiveBody(const char *&First, const char *const End); +  void printAdjacentMacroArgs(const char *&First, const char *const End); +  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); + +  /// Reports a diagnostic if the diagnostic engine is provided. Always returns +  /// true at the end. +  bool reportError(const char *CurPtr, unsigned Err); + +  StringMap<char> SplitIds; +  StringRef Input; +  DiagnosticsEngine *Diags; +  SourceLocation InputSourceLoc; +}; + +} // end anonymous namespace + +bool Minimizer::reportError(const char *CurPtr, unsigned Err) { +  if (!Diags) +    return true; +  assert(CurPtr >= Input.data() && "invalid buffer ptr"); +  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); +  return true; +} + +static void skipOverSpaces(const char *&First, const char *const End) { +  while (First != End && isHorizontalWhitespace(*First)) +    ++First; +} + +LLVM_NODISCARD static bool isRawStringLiteral(const char *First, +                                              const char *Current) { +  assert(First <= Current); + +  // Check if we can even back up. +  if (*Current != '"' || First == Current) +    return false; + +  // Check for an "R". +  --Current; +  if (*Current != 'R') +    return false; +  if (First == Current || !isIdentifierBody(*--Current)) +    return true; + +  // Check for a prefix of "u", "U", or "L". +  if (*Current == 'u' || *Current == 'U' || *Current == 'L') +    return First == Current || !isIdentifierBody(*--Current); + +  // Check for a prefix of "u8". +  if (*Current != '8' || First == Current || *Current-- != 'u') +    return false; +  return First == Current || !isIdentifierBody(*--Current); +} + +static void skipRawString(const char *&First, const char *const End) { +  assert(First[0] == '"'); +  assert(First[-1] == 'R'); + +  const char *Last = ++First; +  while (Last != End && *Last != '(') +    ++Last; +  if (Last == End) { +    First = Last; // Hit the end... just give up. +    return; +  } + +  StringRef Terminator(First, Last - First); +  for (;;) { +    // Move First to just past the next ")". +    First = Last; +    while (First != End && *First != ')') +      ++First; +    if (First == End) +      return; +    ++First; + +    // Look ahead for the terminator sequence. +    Last = First; +    while (Last != End && size_t(Last - First) < Terminator.size() && +           Terminator[Last - First] == *Last) +      ++Last; + +    // Check if we hit it (or the end of the file). +    if (Last == End) { +      First = Last; +      return; +    } +    if (size_t(Last - First) < Terminator.size()) +      continue; +    if (*Last != '"') +      continue; +    First = Last + 1; +    return; +  } +} + +// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) +static unsigned isEOL(const char *First, const char *const End) { +  if (First == End) +    return 0; +  if (End - First > 1 && isVerticalWhitespace(First[0]) && +      isVerticalWhitespace(First[1]) && First[0] != First[1]) +    return 2; +  return !!isVerticalWhitespace(First[0]); +} + +static void skipString(const char *&First, const char *const End) { +  assert(*First == '\'' || *First == '"' || *First == '<'); +  const char Terminator = *First == '<' ? '>' : *First; +  for (++First; First != End && *First != Terminator; ++First) { +    // String and character literals don't extend past the end of the line. +    if (isVerticalWhitespace(*First)) +      return; +    if (*First != '\\') +      continue; +    // Skip past backslash to the next character. This ensures that the +    // character right after it is skipped as well, which matters if it's +    // the terminator. +    if (++First == End) +      return; +    if (!isWhitespace(*First)) +      continue; +    // Whitespace after the backslash might indicate a line continuation. +    const char *FirstAfterBackslashPastSpace = First; +    skipOverSpaces(FirstAfterBackslashPastSpace, End); +    if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { +      // Advance the character pointer to the next line for the next +      // iteration. +      First = FirstAfterBackslashPastSpace + NLSize - 1; +    } +  } +  if (First != End) +    ++First; // Finish off the string. +} + +// Returns the length of the skipped newline +static unsigned skipNewline(const char *&First, const char *End) { +  if (First == End) +    return 0; +  assert(isVerticalWhitespace(*First)); +  unsigned Len = isEOL(First, End); +  assert(Len && "expected newline"); +  First += Len; +  return Len; +} + +static bool wasLineContinuation(const char *First, unsigned EOLLen) { +  return *(First - (int)EOLLen - 1) == '\\'; +} + +static void skipToNewlineRaw(const char *&First, const char *const End) { +  for (;;) { +    if (First == End) +      return; + +    unsigned Len = isEOL(First, End); +    if (Len) +      return; + +    do { +      if (++First == End) +        return; +      Len = isEOL(First, End); +    } while (!Len); + +    if (First[-1] != '\\') +      return; + +    First += Len; +    // Keep skipping lines... +  } +} + +static const char *findLastNonSpace(const char *First, const char *Last) { +  assert(First <= Last); +  while (First != Last && isHorizontalWhitespace(Last[-1])) +    --Last; +  return Last; +} + +static const char *findFirstTrailingSpace(const char *First, +                                          const char *Last) { +  const char *LastNonSpace = findLastNonSpace(First, Last); +  if (Last == LastNonSpace) +    return Last; +  assert(isHorizontalWhitespace(LastNonSpace[0])); +  return LastNonSpace + 1; +} + +static void skipLineComment(const char *&First, const char *const End) { +  assert(First[0] == '/' && First[1] == '/'); +  First += 2; +  skipToNewlineRaw(First, End); +} + +static void skipBlockComment(const char *&First, const char *const End) { +  assert(First[0] == '/' && First[1] == '*'); +  if (End - First < 4) { +    First = End; +    return; +  } +  for (First += 3; First != End; ++First) +    if (First[-1] == '*' && First[0] == '/') { +      ++First; +      return; +    } +} + +/// \returns True if the current single quotation mark character is a C++ 14 +/// digit separator. +static bool isQuoteCppDigitSeparator(const char *const Start, +                                     const char *const Cur, +                                     const char *const End) { +  assert(*Cur == '\'' && "expected quotation character"); +  // skipLine called in places where we don't expect a valid number +  // body before `start` on the same line, so always return false at the start. +  if (Start == Cur) +    return false; +  // The previous character must be a valid PP number character. +  // Make sure that the L, u, U, u8 prefixes don't get marked as a +  // separator though. +  char Prev = *(Cur - 1); +  if (Prev == 'L' || Prev == 'U' || Prev == 'u') +    return false; +  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +    return false; +  if (!isPreprocessingNumberBody(Prev)) +    return false; +  // The next character should be a valid identifier body character. +  return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); +} + +static void skipLine(const char *&First, const char *const End) { +  for (;;) { +    assert(First <= End); +    if (First == End) +      return; + +    if (isVerticalWhitespace(*First)) { +      skipNewline(First, End); +      return; +    } +    const char *Start = First; +    while (First != End && !isVerticalWhitespace(*First)) { +      // Iterate over strings correctly to avoid comments and newlines. +      if (*First == '"' || +          (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { +        if (isRawStringLiteral(Start, First)) +          skipRawString(First, End); +        else +          skipString(First, End); +        continue; +      } + +      // Iterate over comments correctly. +      if (*First != '/' || End - First < 2) { +        ++First; +        continue; +      } + +      if (First[1] == '/') { +        // "//...". +        skipLineComment(First, End); +        continue; +      } + +      if (First[1] != '*') { +        ++First; +        continue; +      } + +      // "/*...*/". +      skipBlockComment(First, End); +    } +    if (First == End) +      return; + +    // Skip over the newline. +    unsigned Len = skipNewline(First, End); +    if (!wasLineContinuation(First, Len)) // Continue past line-continuations. +      break; +  } +} + +static void skipDirective(StringRef Name, const char *&First, +                          const char *const End) { +  if (llvm::StringSwitch<bool>(Name) +          .Case("warning", true) +          .Case("error", true) +          .Default(false)) +    // Do not process quotes or comments. +    skipToNewlineRaw(First, End); +  else +    skipLine(First, End); +} + +void Minimizer::printToNewline(const char *&First, const char *const End) { +  while (First != End && !isVerticalWhitespace(*First)) { +    const char *Last = First; +    do { +      // Iterate over strings correctly to avoid comments and newlines. +      if (*Last == '"' || *Last == '\'' || +          (*Last == '<' && top() == pp_include)) { +        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) +          skipRawString(Last, End); +        else +          skipString(Last, End); +        continue; +      } +      if (*Last != '/' || End - Last < 2) { +        ++Last; +        continue; // Gather the rest up to print verbatim. +      } + +      if (Last[1] != '/' && Last[1] != '*') { +        ++Last; +        continue; +      } + +      // Deal with "//..." and "/*...*/". +      append(First, findFirstTrailingSpace(First, Last)); +      First = Last; + +      if (Last[1] == '/') { +        skipLineComment(First, End); +        return; +      } + +      put(' '); +      skipBlockComment(First, End); +      skipOverSpaces(First, End); +      Last = First; +    } while (Last != End && !isVerticalWhitespace(*Last)); + +    // Print out the string. +    const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); +    if (Last == End || LastBeforeTrailingSpace == First || +        LastBeforeTrailingSpace[-1] != '\\') { +      append(First, LastBeforeTrailingSpace); +      First = Last; +      skipNewline(First, End); +      return; +    } + +    // Print up to the backslash, backing up over spaces. Preserve at least one +    // space, as the space matters when tokens are separated by a line +    // continuation. +    append(First, findFirstTrailingSpace( +                      First, LastBeforeTrailingSpace - 1)); + +    First = Last; +    skipNewline(First, End); +    skipOverSpaces(First, End); +  } +} + +static void skipWhitespace(const char *&First, const char *const End) { +  for (;;) { +    assert(First <= End); +    skipOverSpaces(First, End); + +    if (End - First < 2) +      return; + +    if (First[0] == '\\' && isVerticalWhitespace(First[1])) { +      skipNewline(++First, End); +      continue; +    } + +    // Check for a non-comment character. +    if (First[0] != '/') +      return; + +    // "// ...". +    if (First[1] == '/') { +      skipLineComment(First, End); +      return; +    } + +    // Cannot be a comment. +    if (First[1] != '*') +      return; + +    // "/*...*/". +    skipBlockComment(First, End); +  } +} + +void Minimizer::printAdjacentModuleNameParts(const char *&First, +                                             const char *const End) { +  // Skip over parts of the body. +  const char *Last = First; +  do +    ++Last; +  while (Last != End && (isIdentifierBody(*Last) || *Last == '.')); +  append(First, Last); +  First = Last; +} + +bool Minimizer::printAtImportBody(const char *&First, const char *const End) { +  for (;;) { +    skipWhitespace(First, End); +    if (First == End) +      return true; + +    if (isVerticalWhitespace(*First)) { +      skipNewline(First, End); +      continue; +    } + +    // Found a semicolon. +    if (*First == ';') { +      put(*First++).put('\n'); +      return false; +    } + +    // Don't handle macro expansions inside @import for now. +    if (!isIdentifierBody(*First) && *First != '.') +      return true; + +    printAdjacentModuleNameParts(First, End); +  } +} + +void Minimizer::printDirectiveBody(const char *&First, const char *const End) { +  skipWhitespace(First, End); // Skip initial whitespace. +  printToNewline(First, End); +  while (Out.back() == ' ') +    Out.pop_back(); +  put('\n'); +} + +LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, +                                                   const char *const End) { +  assert(isIdentifierBody(*First) && "invalid identifer"); +  const char *Last = First + 1; +  while (Last != End && isIdentifierBody(*Last)) +    ++Last; +  return Last; +} + +LLVM_NODISCARD static const char * +getIdentifierContinuation(const char *First, const char *const End) { +  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) +    return nullptr; + +  ++First; +  skipNewline(First, End); +  if (First == End) +    return nullptr; +  return isIdentifierBody(First[0]) ? First : nullptr; +} + +Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, +                                           const char *const End) { +  const char *Last = lexRawIdentifier(First, End); +  const char *Next = getIdentifierContinuation(Last, End); +  if (LLVM_LIKELY(!Next)) +    return IdInfo{Last, StringRef(First, Last - First)}; + +  // Slow path, where identifiers are split over lines. +  SmallVector<char, 64> Id(First, Last); +  while (Next) { +    Last = lexRawIdentifier(Next, End); +    Id.append(Next, Last); +    Next = getIdentifierContinuation(Last, End); +  } +  return IdInfo{ +      Last, +      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; +} + +void Minimizer::printAdjacentMacroArgs(const char *&First, +                                       const char *const End) { +  // Skip over parts of the body. +  const char *Last = First; +  do +    ++Last; +  while (Last != End && +         (isIdentifierBody(*Last) || *Last == '.' || *Last == ',')); +  append(First, Last); +  First = Last; +} + +bool Minimizer::printMacroArgs(const char *&First, const char *const End) { +  assert(*First == '('); +  put(*First++); +  for (;;) { +    skipWhitespace(First, End); +    if (First == End) +      return true; + +    if (*First == ')') { +      put(*First++); +      return false; +    } + +    // This is intentionally fairly liberal. +    if (!(isIdentifierBody(*First) || *First == '.' || *First == ',')) +      return true; + +    printAdjacentMacroArgs(First, End); +  } +} + +/// Looks for an identifier starting from Last. +/// +/// Updates "First" to just past the next identifier, if any.  Returns true iff +/// the identifier matches "Id". +bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, +                                 const char *const End) { +  skipWhitespace(First, End); +  if (First == End || !isIdentifierHead(*First)) +    return false; + +  IdInfo FoundId = lexIdentifier(First, End); +  First = FoundId.Last; +  return FoundId.Name == Id; +} + +bool Minimizer::lexAt(const char *&First, const char *const End) { +  // Handle "@import". +  const char *ImportLoc = First++; +  if (!isNextIdentifier("import", First, End)) { +    skipLine(First, End); +    return false; +  } +  makeToken(decl_at_import); +  append("@import "); +  if (printAtImportBody(First, End)) +    return reportError( +        ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); +  skipWhitespace(First, End); +  if (First == End) +    return false; +  if (!isVerticalWhitespace(*First)) +    return reportError( +        ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); +  skipNewline(First, End); +  return false; +} + +bool Minimizer::lexModule(const char *&First, const char *const End) { +  IdInfo Id = lexIdentifier(First, End); +  First = Id.Last; +  bool Export = false; +  if (Id.Name == "export") { +    Export = true; +    skipWhitespace(First, End); +    if (!isIdentifierBody(*First)) { +      skipLine(First, End); +      return false; +    } +    Id = lexIdentifier(First, End); +    First = Id.Last; +  } + +  if (Id.Name != "module" && Id.Name != "import") { +    skipLine(First, End); +    return false; +  } + +  skipWhitespace(First, End); + +  // Ignore this as a module directive if the next character can't be part of +  // an import. + +  switch (*First) { +  case ':': +  case '<': +  case '"': +    break; +  default: +    if (!isIdentifierBody(*First)) { +      skipLine(First, End); +      return false; +    } +  } + +  if (Export) { +    makeToken(cxx_export_decl); +    append("export "); +  } + +  if (Id.Name == "module") +    makeToken(cxx_module_decl); +  else +    makeToken(cxx_import_decl); +  append(Id.Name); +  append(" "); +  printToNewline(First, End); +  append("\n"); +  return false; +} + +bool Minimizer::lexDefine(const char *&First, const char *const End) { +  makeToken(pp_define); +  append("#define "); +  skipWhitespace(First, End); + +  if (!isIdentifierHead(*First)) +    return reportError(First, diag::err_pp_macro_not_identifier); + +  IdInfo Id = lexIdentifier(First, End); +  const char *Last = Id.Last; +  append(Id.Name); +  if (Last == End) +    return false; +  if (*Last == '(') { +    size_t Size = Out.size(); +    if (printMacroArgs(Last, End)) { +      // Be robust to bad macro arguments, since they can show up in disabled +      // code. +      Out.resize(Size); +      append("(/* invalid */\n"); +      skipLine(Last, End); +      return false; +    } +  } +  skipWhitespace(Last, End); +  if (Last == End) +    return false; +  if (!isVerticalWhitespace(*Last)) +    put(' '); +  printDirectiveBody(Last, End); +  First = Last; +  return false; +} + +bool Minimizer::lexPragma(const char *&First, const char *const End) { +  // #pragma. +  skipWhitespace(First, End); +  if (First == End || !isIdentifierHead(*First)) +    return false; + +  IdInfo FoundId = lexIdentifier(First, End); +  First = FoundId.Last; +  if (FoundId.Name == "once") { +    // #pragma once +    skipLine(First, End); +    makeToken(pp_pragma_once); +    append("#pragma once\n"); +    return false; +  } + +  if (FoundId.Name != "clang") { +    skipLine(First, End); +    return false; +  } + +  // #pragma clang. +  if (!isNextIdentifier("module", First, End)) { +    skipLine(First, End); +    return false; +  } + +  // #pragma clang module. +  if (!isNextIdentifier("import", First, End)) { +    skipLine(First, End); +    return false; +  } + +  // #pragma clang module import. +  makeToken(pp_pragma_import); +  append("#pragma clang module import "); +  printDirectiveBody(First, End); +  return false; +} + +bool Minimizer::lexEndif(const char *&First, const char *const End) { +  // Strip out "#else" if it's empty. +  if (top() == pp_else) +    popToken(); + +  // Strip out "#elif" if they're empty. +  while (top() == pp_elif) +    popToken(); + +  // If "#if" is empty, strip it and skip the "#endif". +  if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) { +    popToken(); +    skipLine(First, End); +    return false; +  } + +  return lexDefault(pp_endif, "endif", First, End); +} + +bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, +                           const char *&First, const char *const End) { +  makeToken(Kind); +  put('#').append(Directive).put(' '); +  printDirectiveBody(First, End); +  return false; +} + +static bool isStartOfRelevantLine(char First) { +  switch (First) { +  case '#': +  case '@': +  case 'i': +  case 'e': +  case 'm': +    return true; +  } +  return false; +} + +bool Minimizer::lexPPLine(const char *&First, const char *const End) { +  assert(First != End); + +  skipWhitespace(First, End); +  assert(First <= End); +  if (First == End) +    return false; + +  if (!isStartOfRelevantLine(*First)) { +    skipLine(First, End); +    assert(First <= End); +    return false; +  } + +  // Handle "@import". +  if (*First == '@') +    return lexAt(First, End); + +  if (*First == 'i' || *First == 'e' || *First == 'm') +    return lexModule(First, End); + +  // Handle preprocessing directives. +  ++First; // Skip over '#'. +  skipWhitespace(First, End); + +  if (First == End) +    return reportError(First, diag::err_pp_expected_eol); + +  if (!isIdentifierHead(*First)) { +    skipLine(First, End); +    return false; +  } + +  // Figure out the token. +  IdInfo Id = lexIdentifier(First, End); +  First = Id.Last; +  auto Kind = llvm::StringSwitch<TokenKind>(Id.Name) +                  .Case("include", pp_include) +                  .Case("__include_macros", pp___include_macros) +                  .Case("define", pp_define) +                  .Case("undef", pp_undef) +                  .Case("import", pp_import) +                  .Case("include_next", pp_include_next) +                  .Case("if", pp_if) +                  .Case("ifdef", pp_ifdef) +                  .Case("ifndef", pp_ifndef) +                  .Case("elif", pp_elif) +                  .Case("else", pp_else) +                  .Case("endif", pp_endif) +                  .Case("pragma", pp_pragma_import) +                  .Default(pp_none); +  if (Kind == pp_none) { +    skipDirective(Id.Name, First, End); +    return false; +  } + +  if (Kind == pp_endif) +    return lexEndif(First, End); + +  if (Kind == pp_define) +    return lexDefine(First, End); + +  if (Kind == pp_pragma_import) +    return lexPragma(First, End); + +  // Everything else. +  return lexDefault(Kind, Id.Name, First, End); +} + +static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { +  if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && +      First[2] == '\xbf') +    First += 3; +} + +bool Minimizer::minimizeImpl(const char *First, const char *const End) { +  skipUTF8ByteOrderMark(First, End); +  while (First != End) +    if (lexPPLine(First, End)) +      return true; +  return false; +} + +bool Minimizer::minimize() { +  bool Error = minimizeImpl(Input.begin(), Input.end()); + +  if (!Error) { +    // Add a trailing newline and an EOF on success. +    if (!Out.empty() && Out.back() != '\n') +      Out.push_back('\n'); +    makeToken(pp_eof); +  } + +  // Null-terminate the output. This way the memory buffer that's passed to +  // Clang will not have to worry about the terminating '\0'. +  Out.push_back(0); +  Out.pop_back(); +  return Error; +} + +bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( +    ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) { +  struct Directive { +    enum DirectiveKind { +      If,  // if/ifdef/ifndef +      Else // elif,else +    }; +    int Offset; +    DirectiveKind Kind; +  }; +  llvm::SmallVector<Directive, 32> Offsets; +  for (const Token &T : Input) { +    switch (T.K) { +    case pp_if: +    case pp_ifdef: +    case pp_ifndef: +      Offsets.push_back({T.Offset, Directive::If}); +      break; + +    case pp_elif: +    case pp_else: { +      if (Offsets.empty()) +        return true; +      int PreviousOffset = Offsets.back().Offset; +      Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); +      Offsets.push_back({T.Offset, Directive::Else}); +      break; +    } + +    case pp_endif: { +      if (Offsets.empty()) +        return true; +      int PreviousOffset = Offsets.back().Offset; +      Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); +      do { +        Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; +        if (Kind == Directive::If) +          break; +      } while (!Offsets.empty()); +      break; +    } +    default: +      break; +    } +  } +  return false; +} + +bool clang::minimizeSourceToDependencyDirectives( +    StringRef Input, SmallVectorImpl<char> &Output, +    SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags, +    SourceLocation InputSourceLoc) { +  Output.clear(); +  Tokens.clear(); +  return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); +} diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp new file mode 100644 index 000000000000..d44ef29c05d1 --- /dev/null +++ b/clang/lib/Lex/HeaderMap.cpp @@ -0,0 +1,242 @@ +//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the HeaderMap interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/HeaderMap.h" +#include "clang/Lex/HeaderMapTypes.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/FileManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/Debug.h" +#include <cstring> +#include <memory> +using namespace clang; + +/// HashHMapKey - This is the 'well known' hash function required by the file +/// format, used to look up keys in the hash table.  The hash table uses simple +/// linear probing based on this function. +static inline unsigned HashHMapKey(StringRef Str) { +  unsigned Result = 0; +  const char *S = Str.begin(), *End = Str.end(); + +  for (; S != End; S++) +    Result += toLowercase(*S) * 13; +  return Result; +} + + + +//===----------------------------------------------------------------------===// +// Verification and Construction +//===----------------------------------------------------------------------===// + +/// HeaderMap::Create - This attempts to load the specified file as a header +/// map.  If it doesn't look like a HeaderMap, it gives up and returns null. +/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason +/// into the string error argument and returns null. +std::unique_ptr<HeaderMap> HeaderMap::Create(const FileEntry *FE, +                                             FileManager &FM) { +  // If the file is too small to be a header map, ignore it. +  unsigned FileSize = FE->getSize(); +  if (FileSize <= sizeof(HMapHeader)) return nullptr; + +  auto FileBuffer = FM.getBufferForFile(FE); +  if (!FileBuffer || !*FileBuffer) +    return nullptr; +  bool NeedsByteSwap; +  if (!checkHeader(**FileBuffer, NeedsByteSwap)) +    return nullptr; +  return std::unique_ptr<HeaderMap>(new HeaderMap(std::move(*FileBuffer), NeedsByteSwap)); +} + +bool HeaderMapImpl::checkHeader(const llvm::MemoryBuffer &File, +                                bool &NeedsByteSwap) { +  if (File.getBufferSize() <= sizeof(HMapHeader)) +    return false; +  const char *FileStart = File.getBufferStart(); + +  // We know the file is at least as big as the header, check it now. +  const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart); + +  // Sniff it to see if it's a headermap by checking the magic number and +  // version. +  if (Header->Magic == HMAP_HeaderMagicNumber && +      Header->Version == HMAP_HeaderVersion) +    NeedsByteSwap = false; +  else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) && +           Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion)) +    NeedsByteSwap = true;  // Mixed endianness headermap. +  else +    return false;  // Not a header map. + +  if (Header->Reserved != 0) +    return false; + +  // Check the number of buckets.  It should be a power of two, and there +  // should be enough space in the file for all of them. +  uint32_t NumBuckets = NeedsByteSwap +                            ? llvm::sys::getSwappedBytes(Header->NumBuckets) +                            : Header->NumBuckets; +  if (!llvm::isPowerOf2_32(NumBuckets)) +    return false; +  if (File.getBufferSize() < +      sizeof(HMapHeader) + sizeof(HMapBucket) * NumBuckets) +    return false; + +  // Okay, everything looks good. +  return true; +} + +//===----------------------------------------------------------------------===// +//  Utility Methods +//===----------------------------------------------------------------------===// + + +/// getFileName - Return the filename of the headermap. +StringRef HeaderMapImpl::getFileName() const { +  return FileBuffer->getBufferIdentifier(); +} + +unsigned HeaderMapImpl::getEndianAdjustedWord(unsigned X) const { +  if (!NeedsBSwap) return X; +  return llvm::ByteSwap_32(X); +} + +/// getHeader - Return a reference to the file header, in unbyte-swapped form. +/// This method cannot fail. +const HMapHeader &HeaderMapImpl::getHeader() const { +  // We know the file is at least as big as the header.  Return it. +  return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart()); +} + +/// getBucket - Return the specified hash table bucket from the header map, +/// bswap'ing its fields as appropriate.  If the bucket number is not valid, +/// this return a bucket with an empty key (0). +HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const { +  assert(FileBuffer->getBufferSize() >= +             sizeof(HMapHeader) + sizeof(HMapBucket) * BucketNo && +         "Expected bucket to be in range"); + +  HMapBucket Result; +  Result.Key = HMAP_EmptyBucketKey; + +  const HMapBucket *BucketArray = +    reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() + +                                        sizeof(HMapHeader)); +  const HMapBucket *BucketPtr = BucketArray+BucketNo; + +  // Load the values, bswapping as needed. +  Result.Key    = getEndianAdjustedWord(BucketPtr->Key); +  Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix); +  Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix); +  return Result; +} + +Optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const { +  // Add the start of the string table to the idx. +  StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset); + +  // Check for invalid index. +  if (StrTabIdx >= FileBuffer->getBufferSize()) +    return None; + +  const char *Data = FileBuffer->getBufferStart() + StrTabIdx; +  unsigned MaxLen = FileBuffer->getBufferSize() - StrTabIdx; +  unsigned Len = strnlen(Data, MaxLen); + +  // Check whether the buffer is null-terminated. +  if (Len == MaxLen && Data[Len - 1]) +    return None; + +  return StringRef(Data, Len); +} + +//===----------------------------------------------------------------------===// +// The Main Drivers +//===----------------------------------------------------------------------===// + +/// dump - Print the contents of this headermap to stderr. +LLVM_DUMP_METHOD void HeaderMapImpl::dump() const { +  const HMapHeader &Hdr = getHeader(); +  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); + +  llvm::dbgs() << "Header Map " << getFileName() << ":\n  " << NumBuckets +               << ", " << getEndianAdjustedWord(Hdr.NumEntries) << "\n"; + +  auto getStringOrInvalid = [this](unsigned Id) -> StringRef { +    if (Optional<StringRef> S = getString(Id)) +      return *S; +    return "<invalid>"; +  }; + +  for (unsigned i = 0; i != NumBuckets; ++i) { +    HMapBucket B = getBucket(i); +    if (B.Key == HMAP_EmptyBucketKey) continue; + +    StringRef Key = getStringOrInvalid(B.Key); +    StringRef Prefix = getStringOrInvalid(B.Prefix); +    StringRef Suffix = getStringOrInvalid(B.Suffix); +    llvm::dbgs() << "  " << i << ". " << Key << " -> '" << Prefix << "' '" +                 << Suffix << "'\n"; +  } +} + +/// LookupFile - Check to see if the specified relative filename is located in +/// this HeaderMap.  If so, open it and return its FileEntry. +Optional<FileEntryRef> HeaderMap::LookupFile(StringRef Filename, +                                             FileManager &FM) const { + +  SmallString<1024> Path; +  StringRef Dest = HeaderMapImpl::lookupFilename(Filename, Path); +  if (Dest.empty()) +    return None; + +  return FM.getOptionalFileRef(Dest); +} + +StringRef HeaderMapImpl::lookupFilename(StringRef Filename, +                                        SmallVectorImpl<char> &DestPath) const { +  const HMapHeader &Hdr = getHeader(); +  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); + +  // Don't probe infinitely.  This should be checked before constructing. +  assert(llvm::isPowerOf2_32(NumBuckets) && "Expected power of 2"); + +  // Linearly probe the hash table. +  for (unsigned Bucket = HashHMapKey(Filename);; ++Bucket) { +    HMapBucket B = getBucket(Bucket & (NumBuckets-1)); +    if (B.Key == HMAP_EmptyBucketKey) return StringRef(); // Hash miss. + +    // See if the key matches.  If not, probe on. +    Optional<StringRef> Key = getString(B.Key); +    if (LLVM_UNLIKELY(!Key)) +      continue; +    if (!Filename.equals_lower(*Key)) +      continue; + +    // If so, we have a match in the hash table.  Construct the destination +    // path. +    Optional<StringRef> Prefix = getString(B.Prefix); +    Optional<StringRef> Suffix = getString(B.Suffix); + +    DestPath.clear(); +    if (LLVM_LIKELY(Prefix && Suffix)) { +      DestPath.append(Prefix->begin(), Prefix->end()); +      DestPath.append(Suffix->begin(), Suffix->end()); +    } +    return StringRef(DestPath.begin(), DestPath.size()); +  } +} diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp new file mode 100644 index 000000000000..f0c5900c8ce4 --- /dev/null +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -0,0 +1,1801 @@ +//===- HeaderSearch.cpp - Resolve Header File Locations -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements the DirectoryLookup and HeaderSearch interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/HeaderSearch.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/DirectoryLookup.h" +#include "clang/Lex/ExternalPreprocessorSource.h" +#include "clang/Lex/HeaderMap.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/ModuleMap.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Capacity.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdio> +#include <cstring> +#include <string> +#include <system_error> +#include <utility> + +using namespace clang; + +#define DEBUG_TYPE "file-search" + +ALWAYS_ENABLED_STATISTIC(NumIncluded, "Number of attempted #includes."); +ALWAYS_ENABLED_STATISTIC( +    NumMultiIncludeFileOptzn, +    "Number of #includes skipped due to the multi-include optimization."); +ALWAYS_ENABLED_STATISTIC(NumFrameworkLookups, "Number of framework lookups."); +ALWAYS_ENABLED_STATISTIC(NumSubFrameworkLookups, +                         "Number of subframework lookups."); + +const IdentifierInfo * +HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) { +  if (ControllingMacro) { +    if (ControllingMacro->isOutOfDate()) { +      assert(External && "We must have an external source if we have a " +                         "controlling macro that is out of date."); +      External->updateOutOfDateIdentifier( +          *const_cast<IdentifierInfo *>(ControllingMacro)); +    } +    return ControllingMacro; +  } + +  if (!ControllingMacroID || !External) +    return nullptr; + +  ControllingMacro = External->GetIdentifier(ControllingMacroID); +  return ControllingMacro; +} + +ExternalHeaderFileInfoSource::~ExternalHeaderFileInfoSource() = default; + +HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts, +                           SourceManager &SourceMgr, DiagnosticsEngine &Diags, +                           const LangOptions &LangOpts, +                           const TargetInfo *Target) +    : HSOpts(std::move(HSOpts)), Diags(Diags), +      FileMgr(SourceMgr.getFileManager()), FrameworkMap(64), +      ModMap(SourceMgr, Diags, LangOpts, Target, *this) {} + +void HeaderSearch::PrintStats() { +  llvm::errs() << "\n*** HeaderSearch Stats:\n" +               << FileInfo.size() << " files tracked.\n"; +  unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0; +  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) { +    NumOnceOnlyFiles += FileInfo[i].isImport; +    if (MaxNumIncludes < FileInfo[i].NumIncludes) +      MaxNumIncludes = FileInfo[i].NumIncludes; +    NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1; +  } +  llvm::errs() << "  " << NumOnceOnlyFiles << " #import/#pragma once files.\n" +               << "  " << NumSingleIncludedFiles << " included exactly once.\n" +               << "  " << MaxNumIncludes << " max times a file is included.\n"; + +  llvm::errs() << "  " << NumIncluded << " #include/#include_next/#import.\n" +               << "    " << NumMultiIncludeFileOptzn +               << " #includes skipped due to the multi-include optimization.\n"; + +  llvm::errs() << NumFrameworkLookups << " framework lookups.\n" +               << NumSubFrameworkLookups << " subframework lookups.\n"; +} + +/// CreateHeaderMap - This method returns a HeaderMap for the specified +/// FileEntry, uniquing them through the 'HeaderMaps' datastructure. +const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) { +  // We expect the number of headermaps to be small, and almost always empty. +  // If it ever grows, use of a linear search should be re-evaluated. +  if (!HeaderMaps.empty()) { +    for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i) +      // Pointer equality comparison of FileEntries works because they are +      // already uniqued by inode. +      if (HeaderMaps[i].first == FE) +        return HeaderMaps[i].second.get(); +  } + +  if (std::unique_ptr<HeaderMap> HM = HeaderMap::Create(FE, FileMgr)) { +    HeaderMaps.emplace_back(FE, std::move(HM)); +    return HeaderMaps.back().second.get(); +  } + +  return nullptr; +} + +/// Get filenames for all registered header maps. +void HeaderSearch::getHeaderMapFileNames( +    SmallVectorImpl<std::string> &Names) const { +  for (auto &HM : HeaderMaps) +    Names.push_back(HM.first->getName()); +} + +std::string HeaderSearch::getCachedModuleFileName(Module *Module) { +  const FileEntry *ModuleMap = +      getModuleMap().getModuleMapFileForUniquing(Module); +  return getCachedModuleFileName(Module->Name, ModuleMap->getName()); +} + +std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, +                                                    bool FileMapOnly) { +  // First check the module name to pcm file map. +  auto i (HSOpts->PrebuiltModuleFiles.find(ModuleName)); +  if (i != HSOpts->PrebuiltModuleFiles.end()) +    return i->second; + +  if (FileMapOnly || HSOpts->PrebuiltModulePaths.empty()) +    return {}; + +  // Then go through each prebuilt module directory and try to find the pcm +  // file. +  for (const std::string &Dir : HSOpts->PrebuiltModulePaths) { +    SmallString<256> Result(Dir); +    llvm::sys::fs::make_absolute(Result); +    llvm::sys::path::append(Result, ModuleName + ".pcm"); +    if (getFileMgr().getFile(Result.str())) +      return Result.str().str(); +  } +  return {}; +} + +std::string HeaderSearch::getCachedModuleFileName(StringRef ModuleName, +                                                  StringRef ModuleMapPath) { +  // If we don't have a module cache path or aren't supposed to use one, we +  // can't do anything. +  if (getModuleCachePath().empty()) +    return {}; + +  SmallString<256> Result(getModuleCachePath()); +  llvm::sys::fs::make_absolute(Result); + +  if (HSOpts->DisableModuleHash) { +    llvm::sys::path::append(Result, ModuleName + ".pcm"); +  } else { +    // Construct the name <ModuleName>-<hash of ModuleMapPath>.pcm which should +    // ideally be globally unique to this particular module. Name collisions +    // in the hash are safe (because any translation unit can only import one +    // module with each name), but result in a loss of caching. +    // +    // To avoid false-negatives, we form as canonical a path as we can, and map +    // to lower-case in case we're on a case-insensitive file system. +    std::string Parent = llvm::sys::path::parent_path(ModuleMapPath); +    if (Parent.empty()) +      Parent = "."; +    auto Dir = FileMgr.getDirectory(Parent); +    if (!Dir) +      return {}; +    auto DirName = FileMgr.getCanonicalName(*Dir); +    auto FileName = llvm::sys::path::filename(ModuleMapPath); + +    llvm::hash_code Hash = +      llvm::hash_combine(DirName.lower(), FileName.lower()); + +    SmallString<128> HashStr; +    llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36); +    llvm::sys::path::append(Result, ModuleName + "-" + HashStr + ".pcm"); +  } +  return Result.str().str(); +} + +Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch, +                                   bool AllowExtraModuleMapSearch) { +  // Look in the module map to determine if there is a module by this name. +  Module *Module = ModMap.findModule(ModuleName); +  if (Module || !AllowSearch || !HSOpts->ImplicitModuleMaps) +    return Module; + +  StringRef SearchName = ModuleName; +  Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch); + +  // The facility for "private modules" -- adjacent, optional module maps named +  // module.private.modulemap that are supposed to define private submodules -- +  // may have different flavors of names: FooPrivate, Foo_Private and Foo.Private. +  // +  // Foo.Private is now deprecated in favor of Foo_Private. Users of FooPrivate +  // should also rename to Foo_Private. Representing private as submodules +  // could force building unwanted dependencies into the parent module and cause +  // dependency cycles. +  if (!Module && SearchName.consume_back("_Private")) +    Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch); +  if (!Module && SearchName.consume_back("Private")) +    Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch); +  return Module; +} + +Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, +                                   bool AllowExtraModuleMapSearch) { +  Module *Module = nullptr; + +  // Look through the various header search paths to load any available module +  // maps, searching for a module map that describes this module. +  for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { +    if (SearchDirs[Idx].isFramework()) { +      // Search for or infer a module map for a framework. Here we use +      // SearchName rather than ModuleName, to permit finding private modules +      // named FooPrivate in buggy frameworks named Foo. +      SmallString<128> FrameworkDirName; +      FrameworkDirName += SearchDirs[Idx].getFrameworkDir()->getName(); +      llvm::sys::path::append(FrameworkDirName, SearchName + ".framework"); +      if (auto FrameworkDir = FileMgr.getDirectory(FrameworkDirName)) { +        bool IsSystem +          = SearchDirs[Idx].getDirCharacteristic() != SrcMgr::C_User; +        Module = loadFrameworkModule(ModuleName, *FrameworkDir, IsSystem); +        if (Module) +          break; +      } +    } + +    // FIXME: Figure out how header maps and module maps will work together. + +    // Only deal with normal search directories. +    if (!SearchDirs[Idx].isNormalDir()) +      continue; + +    bool IsSystem = SearchDirs[Idx].isSystemHeaderDirectory(); +    // Search for a module map file in this directory. +    if (loadModuleMapFile(SearchDirs[Idx].getDir(), IsSystem, +                          /*IsFramework*/false) == LMM_NewlyLoaded) { +      // We just loaded a module map file; check whether the module is +      // available now. +      Module = ModMap.findModule(ModuleName); +      if (Module) +        break; +    } + +    // Search for a module map in a subdirectory with the same name as the +    // module. +    SmallString<128> NestedModuleMapDirName; +    NestedModuleMapDirName = SearchDirs[Idx].getDir()->getName(); +    llvm::sys::path::append(NestedModuleMapDirName, ModuleName); +    if (loadModuleMapFile(NestedModuleMapDirName, IsSystem, +                          /*IsFramework*/false) == LMM_NewlyLoaded){ +      // If we just loaded a module map file, look for the module again. +      Module = ModMap.findModule(ModuleName); +      if (Module) +        break; +    } + +    // If we've already performed the exhaustive search for module maps in this +    // search directory, don't do it again. +    if (SearchDirs[Idx].haveSearchedAllModuleMaps()) +      continue; + +    // Load all module maps in the immediate subdirectories of this search +    // directory if ModuleName was from @import. +    if (AllowExtraModuleMapSearch) +      loadSubdirectoryModuleMaps(SearchDirs[Idx]); + +    // Look again for the module. +    Module = ModMap.findModule(ModuleName); +    if (Module) +      break; +  } + +  return Module; +} + +//===----------------------------------------------------------------------===// +// File lookup within a DirectoryLookup scope +//===----------------------------------------------------------------------===// + +/// getName - Return the directory or filename corresponding to this lookup +/// object. +StringRef DirectoryLookup::getName() const { +  // FIXME: Use the name from \c DirectoryEntryRef. +  if (isNormalDir()) +    return getDir()->getName(); +  if (isFramework()) +    return getFrameworkDir()->getName(); +  assert(isHeaderMap() && "Unknown DirectoryLookup"); +  return getHeaderMap()->getFileName(); +} + +Optional<FileEntryRef> HeaderSearch::getFileAndSuggestModule( +    StringRef FileName, SourceLocation IncludeLoc, const DirectoryEntry *Dir, +    bool IsSystemHeaderDir, Module *RequestingModule, +    ModuleMap::KnownHeader *SuggestedModule) { +  // If we have a module map that might map this header, load it and +  // check whether we'll have a suggestion for a module. +  auto File = getFileMgr().getFileRef(FileName, /*OpenFile=*/true); +  if (!File) { +    // For rare, surprising errors (e.g. "out of file handles"), diag the EC +    // message. +    std::error_code EC = llvm::errorToErrorCode(File.takeError()); +    if (EC != llvm::errc::no_such_file_or_directory && +        EC != llvm::errc::invalid_argument && +        EC != llvm::errc::is_a_directory && EC != llvm::errc::not_a_directory) { +      Diags.Report(IncludeLoc, diag::err_cannot_open_file) +          << FileName << EC.message(); +    } +    return None; +  } + +  // If there is a module that corresponds to this header, suggest it. +  if (!findUsableModuleForHeader( +          &File->getFileEntry(), Dir ? Dir : File->getFileEntry().getDir(), +          RequestingModule, SuggestedModule, IsSystemHeaderDir)) +    return None; + +  return *File; +} + +/// LookupFile - Lookup the specified file in this search path, returning it +/// if it exists or returning null if not. +Optional<FileEntryRef> DirectoryLookup::LookupFile( +    StringRef &Filename, HeaderSearch &HS, SourceLocation IncludeLoc, +    SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, +    Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, +    bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound, +    bool &IsInHeaderMap, SmallVectorImpl<char> &MappedName) const { +  InUserSpecifiedSystemFramework = false; +  IsInHeaderMap = false; +  MappedName.clear(); + +  SmallString<1024> TmpDir; +  if (isNormalDir()) { +    // Concatenate the requested file onto the directory. +    TmpDir = getDir()->getName(); +    llvm::sys::path::append(TmpDir, Filename); +    if (SearchPath) { +      StringRef SearchPathRef(getDir()->getName()); +      SearchPath->clear(); +      SearchPath->append(SearchPathRef.begin(), SearchPathRef.end()); +    } +    if (RelativePath) { +      RelativePath->clear(); +      RelativePath->append(Filename.begin(), Filename.end()); +    } + +    return HS.getFileAndSuggestModule(TmpDir, IncludeLoc, getDir(), +                                      isSystemHeaderDirectory(), +                                      RequestingModule, SuggestedModule); +  } + +  if (isFramework()) +    return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath, +                             RequestingModule, SuggestedModule, +                             InUserSpecifiedSystemFramework, IsFrameworkFound); + +  assert(isHeaderMap() && "Unknown directory lookup"); +  const HeaderMap *HM = getHeaderMap(); +  SmallString<1024> Path; +  StringRef Dest = HM->lookupFilename(Filename, Path); +  if (Dest.empty()) +    return None; + +  IsInHeaderMap = true; + +  auto FixupSearchPath = [&]() { +    if (SearchPath) { +      StringRef SearchPathRef(getName()); +      SearchPath->clear(); +      SearchPath->append(SearchPathRef.begin(), SearchPathRef.end()); +    } +    if (RelativePath) { +      RelativePath->clear(); +      RelativePath->append(Filename.begin(), Filename.end()); +    } +  }; + +  // Check if the headermap maps the filename to a framework include +  // ("Foo.h" -> "Foo/Foo.h"), in which case continue header lookup using the +  // framework include. +  if (llvm::sys::path::is_relative(Dest)) { +    MappedName.append(Dest.begin(), Dest.end()); +    Filename = StringRef(MappedName.begin(), MappedName.size()); +    Optional<FileEntryRef> Result = HM->LookupFile(Filename, HS.getFileMgr()); +    if (Result) { +      FixupSearchPath(); +      return *Result; +    } +  } else if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest)) { +    FixupSearchPath(); +    return *Res; +  } + +  return None; +} + +/// Given a framework directory, find the top-most framework directory. +/// +/// \param FileMgr The file manager to use for directory lookups. +/// \param DirName The name of the framework directory. +/// \param SubmodulePath Will be populated with the submodule path from the +/// returned top-level module to the originally named framework. +static const DirectoryEntry * +getTopFrameworkDir(FileManager &FileMgr, StringRef DirName, +                   SmallVectorImpl<std::string> &SubmodulePath) { +  assert(llvm::sys::path::extension(DirName) == ".framework" && +         "Not a framework directory"); + +  // Note: as an egregious but useful hack we use the real path here, because +  // frameworks moving between top-level frameworks to embedded frameworks tend +  // to be symlinked, and we base the logical structure of modules on the +  // physical layout. In particular, we need to deal with crazy includes like +  // +  //   #include <Foo/Frameworks/Bar.framework/Headers/Wibble.h> +  // +  // where 'Bar' used to be embedded in 'Foo', is now a top-level framework +  // which one should access with, e.g., +  // +  //   #include <Bar/Wibble.h> +  // +  // Similar issues occur when a top-level framework has moved into an +  // embedded framework. +  const DirectoryEntry *TopFrameworkDir = nullptr; +  if (auto TopFrameworkDirOrErr = FileMgr.getDirectory(DirName)) +    TopFrameworkDir = *TopFrameworkDirOrErr; + +  if (TopFrameworkDir) +    DirName = FileMgr.getCanonicalName(TopFrameworkDir); +  do { +    // Get the parent directory name. +    DirName = llvm::sys::path::parent_path(DirName); +    if (DirName.empty()) +      break; + +    // Determine whether this directory exists. +    auto Dir = FileMgr.getDirectory(DirName); +    if (!Dir) +      break; + +    // If this is a framework directory, then we're a subframework of this +    // framework. +    if (llvm::sys::path::extension(DirName) == ".framework") { +      SubmodulePath.push_back(llvm::sys::path::stem(DirName)); +      TopFrameworkDir = *Dir; +    } +  } while (true); + +  return TopFrameworkDir; +} + +static bool needModuleLookup(Module *RequestingModule, +                             bool HasSuggestedModule) { +  return HasSuggestedModule || +         (RequestingModule && RequestingModule->NoUndeclaredIncludes); +} + +/// DoFrameworkLookup - Do a lookup of the specified file in the current +/// DirectoryLookup, which is a framework directory. +Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup( +    StringRef Filename, HeaderSearch &HS, SmallVectorImpl<char> *SearchPath, +    SmallVectorImpl<char> *RelativePath, Module *RequestingModule, +    ModuleMap::KnownHeader *SuggestedModule, +    bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound) const { +  FileManager &FileMgr = HS.getFileMgr(); + +  // Framework names must have a '/' in the filename. +  size_t SlashPos = Filename.find('/'); +  if (SlashPos == StringRef::npos) +    return None; + +  // Find out if this is the home for the specified framework, by checking +  // HeaderSearch.  Possible answers are yes/no and unknown. +  FrameworkCacheEntry &CacheEntry = +    HS.LookupFrameworkCache(Filename.substr(0, SlashPos)); + +  // If it is known and in some other directory, fail. +  if (CacheEntry.Directory && CacheEntry.Directory != getFrameworkDir()) +    return None; + +  // Otherwise, construct the path to this framework dir. + +  // FrameworkName = "/System/Library/Frameworks/" +  SmallString<1024> FrameworkName; +  FrameworkName += getFrameworkDirRef()->getName(); +  if (FrameworkName.empty() || FrameworkName.back() != '/') +    FrameworkName.push_back('/'); + +  // FrameworkName = "/System/Library/Frameworks/Cocoa" +  StringRef ModuleName(Filename.begin(), SlashPos); +  FrameworkName += ModuleName; + +  // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/" +  FrameworkName += ".framework/"; + +  // If the cache entry was unresolved, populate it now. +  if (!CacheEntry.Directory) { +    ++NumFrameworkLookups; + +    // If the framework dir doesn't exist, we fail. +    auto Dir = FileMgr.getDirectory(FrameworkName); +    if (!Dir) +      return None; + +    // Otherwise, if it does, remember that this is the right direntry for this +    // framework. +    CacheEntry.Directory = getFrameworkDir(); + +    // If this is a user search directory, check if the framework has been +    // user-specified as a system framework. +    if (getDirCharacteristic() == SrcMgr::C_User) { +      SmallString<1024> SystemFrameworkMarker(FrameworkName); +      SystemFrameworkMarker += ".system_framework"; +      if (llvm::sys::fs::exists(SystemFrameworkMarker)) { +        CacheEntry.IsUserSpecifiedSystemFramework = true; +      } +    } +  } + +  // Set out flags. +  InUserSpecifiedSystemFramework = CacheEntry.IsUserSpecifiedSystemFramework; +  IsFrameworkFound = CacheEntry.Directory; + +  if (RelativePath) { +    RelativePath->clear(); +    RelativePath->append(Filename.begin()+SlashPos+1, Filename.end()); +  } + +  // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h" +  unsigned OrigSize = FrameworkName.size(); + +  FrameworkName += "Headers/"; + +  if (SearchPath) { +    SearchPath->clear(); +    // Without trailing '/'. +    SearchPath->append(FrameworkName.begin(), FrameworkName.end()-1); +  } + +  FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end()); + +  auto File = +      FileMgr.getOptionalFileRef(FrameworkName, /*OpenFile=*/!SuggestedModule); +  if (!File) { +    // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" +    const char *Private = "Private"; +    FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, +                         Private+strlen(Private)); +    if (SearchPath) +      SearchPath->insert(SearchPath->begin()+OrigSize, Private, +                         Private+strlen(Private)); + +    File = FileMgr.getOptionalFileRef(FrameworkName, +                                      /*OpenFile=*/!SuggestedModule); +  } + +  // If we found the header and are allowed to suggest a module, do so now. +  if (File && needModuleLookup(RequestingModule, SuggestedModule)) { +    // Find the framework in which this header occurs. +    StringRef FrameworkPath = File->getFileEntry().getDir()->getName(); +    bool FoundFramework = false; +    do { +      // Determine whether this directory exists. +      auto Dir = FileMgr.getDirectory(FrameworkPath); +      if (!Dir) +        break; + +      // If this is a framework directory, then we're a subframework of this +      // framework. +      if (llvm::sys::path::extension(FrameworkPath) == ".framework") { +        FoundFramework = true; +        break; +      } + +      // Get the parent directory name. +      FrameworkPath = llvm::sys::path::parent_path(FrameworkPath); +      if (FrameworkPath.empty()) +        break; +    } while (true); + +    bool IsSystem = getDirCharacteristic() != SrcMgr::C_User; +    if (FoundFramework) { +      if (!HS.findUsableModuleForFrameworkHeader( +              &File->getFileEntry(), FrameworkPath, RequestingModule, +              SuggestedModule, IsSystem)) +        return None; +    } else { +      if (!HS.findUsableModuleForHeader(&File->getFileEntry(), getDir(), +                                        RequestingModule, SuggestedModule, +                                        IsSystem)) +        return None; +    } +  } +  if (File) +    return *File; +  return None; +} + +void HeaderSearch::setTarget(const TargetInfo &Target) { +  ModMap.setTarget(Target); +} + +//===----------------------------------------------------------------------===// +// Header File Location. +//===----------------------------------------------------------------------===// + +/// Return true with a diagnostic if the file that MSVC would have found +/// fails to match the one that Clang would have found with MSVC header search +/// disabled. +static bool checkMSVCHeaderSearch(DiagnosticsEngine &Diags, +                                  const FileEntry *MSFE, const FileEntry *FE, +                                  SourceLocation IncludeLoc) { +  if (MSFE && FE != MSFE) { +    Diags.Report(IncludeLoc, diag::ext_pp_include_search_ms) << MSFE->getName(); +    return true; +  } +  return false; +} + +static const char *copyString(StringRef Str, llvm::BumpPtrAllocator &Alloc) { +  assert(!Str.empty()); +  char *CopyStr = Alloc.Allocate<char>(Str.size()+1); +  std::copy(Str.begin(), Str.end(), CopyStr); +  CopyStr[Str.size()] = '\0'; +  return CopyStr; +} + +static bool isFrameworkStylePath(StringRef Path, bool &IsPrivateHeader, +                                 SmallVectorImpl<char> &FrameworkName) { +  using namespace llvm::sys; +  path::const_iterator I = path::begin(Path); +  path::const_iterator E = path::end(Path); +  IsPrivateHeader = false; + +  // Detect different types of framework style paths: +  // +  //   ...Foo.framework/{Headers,PrivateHeaders} +  //   ...Foo.framework/Versions/{A,Current}/{Headers,PrivateHeaders} +  //   ...Foo.framework/Frameworks/Nested.framework/{Headers,PrivateHeaders} +  //   ...<other variations with 'Versions' like in the above path> +  // +  // and some other variations among these lines. +  int FoundComp = 0; +  while (I != E) { +    if (*I == "Headers") +      ++FoundComp; +    if (I->endswith(".framework")) { +      FrameworkName.append(I->begin(), I->end()); +      ++FoundComp; +    } +    if (*I == "PrivateHeaders") { +      ++FoundComp; +      IsPrivateHeader = true; +    } +    ++I; +  } + +  return !FrameworkName.empty() && FoundComp >= 2; +} + +static void +diagnoseFrameworkInclude(DiagnosticsEngine &Diags, SourceLocation IncludeLoc, +                         StringRef Includer, StringRef IncludeFilename, +                         const FileEntry *IncludeFE, bool isAngled = false, +                         bool FoundByHeaderMap = false) { +  bool IsIncluderPrivateHeader = false; +  SmallString<128> FromFramework, ToFramework; +  if (!isFrameworkStylePath(Includer, IsIncluderPrivateHeader, FromFramework)) +    return; +  bool IsIncludeePrivateHeader = false; +  bool IsIncludeeInFramework = isFrameworkStylePath( +      IncludeFE->getName(), IsIncludeePrivateHeader, ToFramework); + +  if (!isAngled && !FoundByHeaderMap) { +    SmallString<128> NewInclude("<"); +    if (IsIncludeeInFramework) { +      NewInclude += StringRef(ToFramework).drop_back(10); // drop .framework +      NewInclude += "/"; +    } +    NewInclude += IncludeFilename; +    NewInclude += ">"; +    Diags.Report(IncludeLoc, diag::warn_quoted_include_in_framework_header) +        << IncludeFilename +        << FixItHint::CreateReplacement(IncludeLoc, NewInclude); +  } + +  // Headers in Foo.framework/Headers should not include headers +  // from Foo.framework/PrivateHeaders, since this violates public/private +  // API boundaries and can cause modular dependency cycles. +  if (!IsIncluderPrivateHeader && IsIncludeeInFramework && +      IsIncludeePrivateHeader && FromFramework == ToFramework) +    Diags.Report(IncludeLoc, diag::warn_framework_include_private_from_public) +        << IncludeFilename; +} + +/// LookupFile - Given a "foo" or \<foo> reference, look up the indicated file, +/// return null on failure.  isAngled indicates whether the file reference is +/// for system \#include's or not (i.e. using <> instead of ""). Includers, if +/// non-empty, indicates where the \#including file(s) are, in case a relative +/// search is needed. Microsoft mode will pass all \#including files. +Optional<FileEntryRef> HeaderSearch::LookupFile( +    StringRef Filename, SourceLocation IncludeLoc, bool isAngled, +    const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir, +    ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers, +    SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, +    Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, +    bool *IsMapped, bool *IsFrameworkFound, bool SkipCache, +    bool BuildSystemModule) { +  if (IsMapped) +    *IsMapped = false; + +  if (IsFrameworkFound) +    *IsFrameworkFound = false; + +  if (SuggestedModule) +    *SuggestedModule = ModuleMap::KnownHeader(); + +  // If 'Filename' is absolute, check to see if it exists and no searching. +  if (llvm::sys::path::is_absolute(Filename)) { +    CurDir = nullptr; + +    // If this was an #include_next "/absolute/file", fail. +    if (FromDir) +      return None; + +    if (SearchPath) +      SearchPath->clear(); +    if (RelativePath) { +      RelativePath->clear(); +      RelativePath->append(Filename.begin(), Filename.end()); +    } +    // Otherwise, just return the file. +    return getFileAndSuggestModule(Filename, IncludeLoc, nullptr, +                                   /*IsSystemHeaderDir*/false, +                                   RequestingModule, SuggestedModule); +  } + +  // This is the header that MSVC's header search would have found. +  ModuleMap::KnownHeader MSSuggestedModule; +  const FileEntry *MSFE_FE = nullptr; +  StringRef MSFE_Name; + +  // Unless disabled, check to see if the file is in the #includer's +  // directory.  This cannot be based on CurDir, because each includer could be +  // a #include of a subdirectory (#include "foo/bar.h") and a subsequent +  // include of "baz.h" should resolve to "whatever/foo/baz.h". +  // This search is not done for <> headers. +  if (!Includers.empty() && !isAngled && !NoCurDirSearch) { +    SmallString<1024> TmpDir; +    bool First = true; +    for (const auto &IncluderAndDir : Includers) { +      const FileEntry *Includer = IncluderAndDir.first; + +      // Concatenate the requested file onto the directory. +      // FIXME: Portability.  Filename concatenation should be in sys::Path. +      TmpDir = IncluderAndDir.second->getName(); +      TmpDir.push_back('/'); +      TmpDir.append(Filename.begin(), Filename.end()); + +      // FIXME: We don't cache the result of getFileInfo across the call to +      // getFileAndSuggestModule, because it's a reference to an element of +      // a container that could be reallocated across this call. +      // +      // If we have no includer, that means we're processing a #include +      // from a module build. We should treat this as a system header if we're +      // building a [system] module. +      bool IncluderIsSystemHeader = +          Includer ? getFileInfo(Includer).DirInfo != SrcMgr::C_User : +          BuildSystemModule; +      if (Optional<FileEntryRef> FE = getFileAndSuggestModule( +              TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader, +              RequestingModule, SuggestedModule)) { +        if (!Includer) { +          assert(First && "only first includer can have no file"); +          return FE; +        } + +        // Leave CurDir unset. +        // This file is a system header or C++ unfriendly if the old file is. +        // +        // Note that we only use one of FromHFI/ToHFI at once, due to potential +        // reallocation of the underlying vector potentially making the first +        // reference binding dangling. +        HeaderFileInfo &FromHFI = getFileInfo(Includer); +        unsigned DirInfo = FromHFI.DirInfo; +        bool IndexHeaderMapHeader = FromHFI.IndexHeaderMapHeader; +        StringRef Framework = FromHFI.Framework; + +        HeaderFileInfo &ToHFI = getFileInfo(&FE->getFileEntry()); +        ToHFI.DirInfo = DirInfo; +        ToHFI.IndexHeaderMapHeader = IndexHeaderMapHeader; +        ToHFI.Framework = Framework; + +        if (SearchPath) { +          StringRef SearchPathRef(IncluderAndDir.second->getName()); +          SearchPath->clear(); +          SearchPath->append(SearchPathRef.begin(), SearchPathRef.end()); +        } +        if (RelativePath) { +          RelativePath->clear(); +          RelativePath->append(Filename.begin(), Filename.end()); +        } +        if (First) { +          diagnoseFrameworkInclude(Diags, IncludeLoc, +                                   IncluderAndDir.second->getName(), Filename, +                                   &FE->getFileEntry()); +          return FE; +        } + +        // Otherwise, we found the path via MSVC header search rules.  If +        // -Wmsvc-include is enabled, we have to keep searching to see if we +        // would've found this header in -I or -isystem directories. +        if (Diags.isIgnored(diag::ext_pp_include_search_ms, IncludeLoc)) { +          return FE; +        } else { +          MSFE_FE = &FE->getFileEntry(); +          MSFE_Name = FE->getName(); +          if (SuggestedModule) { +            MSSuggestedModule = *SuggestedModule; +            *SuggestedModule = ModuleMap::KnownHeader(); +          } +          break; +        } +      } +      First = false; +    } +  } + +  Optional<FileEntryRef> MSFE(MSFE_FE ? FileEntryRef(MSFE_Name, *MSFE_FE) +                                      : Optional<FileEntryRef>()); + +  CurDir = nullptr; + +  // If this is a system #include, ignore the user #include locs. +  unsigned i = isAngled ? AngledDirIdx : 0; + +  // If this is a #include_next request, start searching after the directory the +  // file was found in. +  if (FromDir) +    i = FromDir-&SearchDirs[0]; + +  // Cache all of the lookups performed by this method.  Many headers are +  // multiply included, and the "pragma once" optimization prevents them from +  // being relex/pp'd, but they would still have to search through a +  // (potentially huge) series of SearchDirs to find it. +  LookupFileCacheInfo &CacheLookup = LookupFileCache[Filename]; + +  // If the entry has been previously looked up, the first value will be +  // non-zero.  If the value is equal to i (the start point of our search), then +  // this is a matching hit. +  if (!SkipCache && CacheLookup.StartIdx == i+1) { +    // Skip querying potentially lots of directories for this lookup. +    i = CacheLookup.HitIdx; +    if (CacheLookup.MappedName) { +      Filename = CacheLookup.MappedName; +      if (IsMapped) +        *IsMapped = true; +    } +  } else { +    // Otherwise, this is the first query, or the previous query didn't match +    // our search start.  We will fill in our found location below, so prime the +    // start point value. +    CacheLookup.reset(/*StartIdx=*/i+1); +  } + +  SmallString<64> MappedName; + +  // Check each directory in sequence to see if it contains this file. +  for (; i != SearchDirs.size(); ++i) { +    bool InUserSpecifiedSystemFramework = false; +    bool IsInHeaderMap = false; +    bool IsFrameworkFoundInDir = false; +    Optional<FileEntryRef> File = SearchDirs[i].LookupFile( +        Filename, *this, IncludeLoc, SearchPath, RelativePath, RequestingModule, +        SuggestedModule, InUserSpecifiedSystemFramework, IsFrameworkFoundInDir, +        IsInHeaderMap, MappedName); +    if (!MappedName.empty()) { +      assert(IsInHeaderMap && "MappedName should come from a header map"); +      CacheLookup.MappedName = +          copyString(MappedName, LookupFileCache.getAllocator()); +    } +    if (IsMapped) +      // A filename is mapped when a header map remapped it to a relative path +      // used in subsequent header search or to an absolute path pointing to an +      // existing file. +      *IsMapped |= (!MappedName.empty() || (IsInHeaderMap && File)); +    if (IsFrameworkFound) +      // Because we keep a filename remapped for subsequent search directory +      // lookups, ignore IsFrameworkFoundInDir after the first remapping and not +      // just for remapping in a current search directory. +      *IsFrameworkFound |= (IsFrameworkFoundInDir && !CacheLookup.MappedName); +    if (!File) +      continue; + +    CurDir = &SearchDirs[i]; + +    // This file is a system header or C++ unfriendly if the dir is. +    HeaderFileInfo &HFI = getFileInfo(&File->getFileEntry()); +    HFI.DirInfo = CurDir->getDirCharacteristic(); + +    // If the directory characteristic is User but this framework was +    // user-specified to be treated as a system framework, promote the +    // characteristic. +    if (HFI.DirInfo == SrcMgr::C_User && InUserSpecifiedSystemFramework) +      HFI.DirInfo = SrcMgr::C_System; + +    // If the filename matches a known system header prefix, override +    // whether the file is a system header. +    for (unsigned j = SystemHeaderPrefixes.size(); j; --j) { +      if (Filename.startswith(SystemHeaderPrefixes[j-1].first)) { +        HFI.DirInfo = SystemHeaderPrefixes[j-1].second ? SrcMgr::C_System +                                                       : SrcMgr::C_User; +        break; +      } +    } + +    // If this file is found in a header map and uses the framework style of +    // includes, then this header is part of a framework we're building. +    if (CurDir->isIndexHeaderMap()) { +      size_t SlashPos = Filename.find('/'); +      if (SlashPos != StringRef::npos) { +        HFI.IndexHeaderMapHeader = 1; +        HFI.Framework = getUniqueFrameworkName(StringRef(Filename.begin(), +                                                         SlashPos)); +      } +    } + +    if (checkMSVCHeaderSearch(Diags, MSFE ? &MSFE->getFileEntry() : nullptr, +                              &File->getFileEntry(), IncludeLoc)) { +      if (SuggestedModule) +        *SuggestedModule = MSSuggestedModule; +      return MSFE; +    } + +    bool FoundByHeaderMap = !IsMapped ? false : *IsMapped; +    if (!Includers.empty()) +      diagnoseFrameworkInclude( +          Diags, IncludeLoc, Includers.front().second->getName(), Filename, +          &File->getFileEntry(), isAngled, FoundByHeaderMap); + +    // Remember this location for the next lookup we do. +    CacheLookup.HitIdx = i; +    return File; +  } + +  // If we are including a file with a quoted include "foo.h" from inside +  // a header in a framework that is currently being built, and we couldn't +  // resolve "foo.h" any other way, change the include to <Foo/foo.h>, where +  // "Foo" is the name of the framework in which the including header was found. +  if (!Includers.empty() && Includers.front().first && !isAngled && +      Filename.find('/') == StringRef::npos) { +    HeaderFileInfo &IncludingHFI = getFileInfo(Includers.front().first); +    if (IncludingHFI.IndexHeaderMapHeader) { +      SmallString<128> ScratchFilename; +      ScratchFilename += IncludingHFI.Framework; +      ScratchFilename += '/'; +      ScratchFilename += Filename; + +      Optional<FileEntryRef> File = LookupFile( +          ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, CurDir, +          Includers.front(), SearchPath, RelativePath, RequestingModule, +          SuggestedModule, IsMapped, /*IsFrameworkFound=*/nullptr); + +      if (checkMSVCHeaderSearch(Diags, MSFE ? &MSFE->getFileEntry() : nullptr, +                                File ? &File->getFileEntry() : nullptr, +                                IncludeLoc)) { +        if (SuggestedModule) +          *SuggestedModule = MSSuggestedModule; +        return MSFE; +      } + +      LookupFileCacheInfo &CacheLookup = LookupFileCache[Filename]; +      CacheLookup.HitIdx = LookupFileCache[ScratchFilename].HitIdx; +      // FIXME: SuggestedModule. +      return File; +    } +  } + +  if (checkMSVCHeaderSearch(Diags, MSFE ? &MSFE->getFileEntry() : nullptr, +                            nullptr, IncludeLoc)) { +    if (SuggestedModule) +      *SuggestedModule = MSSuggestedModule; +    return MSFE; +  } + +  // Otherwise, didn't find it. Remember we didn't find this. +  CacheLookup.HitIdx = SearchDirs.size(); +  return None; +} + +/// LookupSubframeworkHeader - Look up a subframework for the specified +/// \#include file.  For example, if \#include'ing <HIToolbox/HIToolbox.h> from +/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox +/// is a subframework within Carbon.framework.  If so, return the FileEntry +/// for the designated file, otherwise return null. +Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( +    StringRef Filename, const FileEntry *ContextFileEnt, +    SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, +    Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule) { +  assert(ContextFileEnt && "No context file?"); + +  // Framework names must have a '/' in the filename.  Find it. +  // FIXME: Should we permit '\' on Windows? +  size_t SlashPos = Filename.find('/'); +  if (SlashPos == StringRef::npos) +    return None; + +  // Look up the base framework name of the ContextFileEnt. +  StringRef ContextName = ContextFileEnt->getName(); + +  // If the context info wasn't a framework, couldn't be a subframework. +  const unsigned DotFrameworkLen = 10; +  auto FrameworkPos = ContextName.find(".framework"); +  if (FrameworkPos == StringRef::npos || +      (ContextName[FrameworkPos + DotFrameworkLen] != '/' && +       ContextName[FrameworkPos + DotFrameworkLen] != '\\')) +    return None; + +  SmallString<1024> FrameworkName(ContextName.data(), ContextName.data() + +                                                          FrameworkPos + +                                                          DotFrameworkLen + 1); + +  // Append Frameworks/HIToolbox.framework/ +  FrameworkName += "Frameworks/"; +  FrameworkName.append(Filename.begin(), Filename.begin()+SlashPos); +  FrameworkName += ".framework/"; + +  auto &CacheLookup = +      *FrameworkMap.insert(std::make_pair(Filename.substr(0, SlashPos), +                                          FrameworkCacheEntry())).first; + +  // Some other location? +  if (CacheLookup.second.Directory && +      CacheLookup.first().size() == FrameworkName.size() && +      memcmp(CacheLookup.first().data(), &FrameworkName[0], +             CacheLookup.first().size()) != 0) +    return None; + +  // Cache subframework. +  if (!CacheLookup.second.Directory) { +    ++NumSubFrameworkLookups; + +    // If the framework dir doesn't exist, we fail. +    auto Dir = FileMgr.getDirectory(FrameworkName); +    if (!Dir) +      return None; + +    // Otherwise, if it does, remember that this is the right direntry for this +    // framework. +    CacheLookup.second.Directory = *Dir; +  } + + +  if (RelativePath) { +    RelativePath->clear(); +    RelativePath->append(Filename.begin()+SlashPos+1, Filename.end()); +  } + +  // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h" +  SmallString<1024> HeadersFilename(FrameworkName); +  HeadersFilename += "Headers/"; +  if (SearchPath) { +    SearchPath->clear(); +    // Without trailing '/'. +    SearchPath->append(HeadersFilename.begin(), HeadersFilename.end()-1); +  } + +  HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end()); +  auto File = FileMgr.getOptionalFileRef(HeadersFilename, /*OpenFile=*/true); +  if (!File) { +    // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" +    HeadersFilename = FrameworkName; +    HeadersFilename += "PrivateHeaders/"; +    if (SearchPath) { +      SearchPath->clear(); +      // Without trailing '/'. +      SearchPath->append(HeadersFilename.begin(), HeadersFilename.end()-1); +    } + +    HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end()); +    File = FileMgr.getOptionalFileRef(HeadersFilename, /*OpenFile=*/true); + +    if (!File) +      return None; +  } + +  // This file is a system header or C++ unfriendly if the old file is. +  // +  // Note that the temporary 'DirInfo' is required here, as either call to +  // getFileInfo could resize the vector and we don't want to rely on order +  // of evaluation. +  unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo; +  getFileInfo(&File->getFileEntry()).DirInfo = DirInfo; + +  FrameworkName.pop_back(); // remove the trailing '/' +  if (!findUsableModuleForFrameworkHeader(&File->getFileEntry(), FrameworkName, +                                          RequestingModule, SuggestedModule, +                                          /*IsSystem*/ false)) +    return None; + +  return *File; +} + +//===----------------------------------------------------------------------===// +// File Info Management. +//===----------------------------------------------------------------------===// + +/// Merge the header file info provided by \p OtherHFI into the current +/// header file info (\p HFI) +static void mergeHeaderFileInfo(HeaderFileInfo &HFI, +                                const HeaderFileInfo &OtherHFI) { +  assert(OtherHFI.External && "expected to merge external HFI"); + +  HFI.isImport |= OtherHFI.isImport; +  HFI.isPragmaOnce |= OtherHFI.isPragmaOnce; +  HFI.isModuleHeader |= OtherHFI.isModuleHeader; +  HFI.NumIncludes += OtherHFI.NumIncludes; + +  if (!HFI.ControllingMacro && !HFI.ControllingMacroID) { +    HFI.ControllingMacro = OtherHFI.ControllingMacro; +    HFI.ControllingMacroID = OtherHFI.ControllingMacroID; +  } + +  HFI.DirInfo = OtherHFI.DirInfo; +  HFI.External = (!HFI.IsValid || HFI.External); +  HFI.IsValid = true; +  HFI.IndexHeaderMapHeader = OtherHFI.IndexHeaderMapHeader; + +  if (HFI.Framework.empty()) +    HFI.Framework = OtherHFI.Framework; +} + +/// getFileInfo - Return the HeaderFileInfo structure for the specified +/// FileEntry. +HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) { +  if (FE->getUID() >= FileInfo.size()) +    FileInfo.resize(FE->getUID() + 1); + +  HeaderFileInfo *HFI = &FileInfo[FE->getUID()]; +  // FIXME: Use a generation count to check whether this is really up to date. +  if (ExternalSource && !HFI->Resolved) { +    HFI->Resolved = true; +    auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE); + +    HFI = &FileInfo[FE->getUID()]; +    if (ExternalHFI.External) +      mergeHeaderFileInfo(*HFI, ExternalHFI); +  } + +  HFI->IsValid = true; +  // We have local information about this header file, so it's no longer +  // strictly external. +  HFI->External = false; +  return *HFI; +} + +const HeaderFileInfo * +HeaderSearch::getExistingFileInfo(const FileEntry *FE, +                                  bool WantExternal) const { +  // If we have an external source, ensure we have the latest information. +  // FIXME: Use a generation count to check whether this is really up to date. +  HeaderFileInfo *HFI; +  if (ExternalSource) { +    if (FE->getUID() >= FileInfo.size()) { +      if (!WantExternal) +        return nullptr; +      FileInfo.resize(FE->getUID() + 1); +    } + +    HFI = &FileInfo[FE->getUID()]; +    if (!WantExternal && (!HFI->IsValid || HFI->External)) +      return nullptr; +    if (!HFI->Resolved) { +      HFI->Resolved = true; +      auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE); + +      HFI = &FileInfo[FE->getUID()]; +      if (ExternalHFI.External) +        mergeHeaderFileInfo(*HFI, ExternalHFI); +    } +  } else if (FE->getUID() >= FileInfo.size()) { +    return nullptr; +  } else { +    HFI = &FileInfo[FE->getUID()]; +  } + +  if (!HFI->IsValid || (HFI->External && !WantExternal)) +    return nullptr; + +  return HFI; +} + +bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) { +  // Check if we've ever seen this file as a header. +  if (auto *HFI = getExistingFileInfo(File)) +    return HFI->isPragmaOnce || HFI->isImport || HFI->ControllingMacro || +           HFI->ControllingMacroID; +  return false; +} + +void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE, +                                        ModuleMap::ModuleHeaderRole Role, +                                        bool isCompilingModuleHeader) { +  bool isModularHeader = !(Role & ModuleMap::TextualHeader); + +  // Don't mark the file info as non-external if there's nothing to change. +  if (!isCompilingModuleHeader) { +    if (!isModularHeader) +      return; +    auto *HFI = getExistingFileInfo(FE); +    if (HFI && HFI->isModuleHeader) +      return; +  } + +  auto &HFI = getFileInfo(FE); +  HFI.isModuleHeader |= isModularHeader; +  HFI.isCompilingModuleHeader |= isCompilingModuleHeader; +} + +bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP, +                                          const FileEntry *File, bool isImport, +                                          bool ModulesEnabled, Module *M) { +  ++NumIncluded; // Count # of attempted #includes. + +  // Get information about this file. +  HeaderFileInfo &FileInfo = getFileInfo(File); + +  // FIXME: this is a workaround for the lack of proper modules-aware support +  // for #import / #pragma once +  auto TryEnterImported = [&]() -> bool { +    if (!ModulesEnabled) +      return false; +    // Ensure FileInfo bits are up to date. +    ModMap.resolveHeaderDirectives(File); +    // Modules with builtins are special; multiple modules use builtins as +    // modular headers, example: +    // +    //    module stddef { header "stddef.h" export * } +    // +    // After module map parsing, this expands to: +    // +    //    module stddef { +    //      header "/path_to_builtin_dirs/stddef.h" +    //      textual "stddef.h" +    //    } +    // +    // It's common that libc++ and system modules will both define such +    // submodules. Make sure cached results for a builtin header won't +    // prevent other builtin modules to potentially enter the builtin header. +    // Note that builtins are header guarded and the decision to actually +    // enter them is postponed to the controlling macros logic below. +    bool TryEnterHdr = false; +    if (FileInfo.isCompilingModuleHeader && FileInfo.isModuleHeader) +      TryEnterHdr = File->getDir() == ModMap.getBuiltinDir() && +                    ModuleMap::isBuiltinHeader( +                        llvm::sys::path::filename(File->getName())); + +    // Textual headers can be #imported from different modules. Since ObjC +    // headers find in the wild might rely only on #import and do not contain +    // controlling macros, be conservative and only try to enter textual headers +    // if such macro is present. +    if (!FileInfo.isModuleHeader && +        FileInfo.getControllingMacro(ExternalLookup)) +      TryEnterHdr = true; +    return TryEnterHdr; +  }; + +  // If this is a #import directive, check that we have not already imported +  // this header. +  if (isImport) { +    // If this has already been imported, don't import it again. +    FileInfo.isImport = true; + +    // Has this already been #import'ed or #include'd? +    if (FileInfo.NumIncludes && !TryEnterImported()) +      return false; +  } else { +    // Otherwise, if this is a #include of a file that was previously #import'd +    // or if this is the second #include of a #pragma once file, ignore it. +    if (FileInfo.isImport && !TryEnterImported()) +      return false; +  } + +  // Next, check to see if the file is wrapped with #ifndef guards.  If so, and +  // if the macro that guards it is defined, we know the #include has no effect. +  if (const IdentifierInfo *ControllingMacro +      = FileInfo.getControllingMacro(ExternalLookup)) { +    // If the header corresponds to a module, check whether the macro is already +    // defined in that module rather than checking in the current set of visible +    // modules. +    if (M ? PP.isMacroDefinedInLocalModule(ControllingMacro, M) +          : PP.isMacroDefined(ControllingMacro)) { +      ++NumMultiIncludeFileOptzn; +      return false; +    } +  } + +  // Increment the number of times this file has been included. +  ++FileInfo.NumIncludes; + +  return true; +} + +size_t HeaderSearch::getTotalMemory() const { +  return SearchDirs.capacity() +    + llvm::capacity_in_bytes(FileInfo) +    + llvm::capacity_in_bytes(HeaderMaps) +    + LookupFileCache.getAllocator().getTotalMemory() +    + FrameworkMap.getAllocator().getTotalMemory(); +} + +StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) { +  return FrameworkNames.insert(Framework).first->first(); +} + +bool HeaderSearch::hasModuleMap(StringRef FileName, +                                const DirectoryEntry *Root, +                                bool IsSystem) { +  if (!HSOpts->ImplicitModuleMaps) +    return false; + +  SmallVector<const DirectoryEntry *, 2> FixUpDirectories; + +  StringRef DirName = FileName; +  do { +    // Get the parent directory name. +    DirName = llvm::sys::path::parent_path(DirName); +    if (DirName.empty()) +      return false; + +    // Determine whether this directory exists. +    auto Dir = FileMgr.getDirectory(DirName); +    if (!Dir) +      return false; + +    // Try to load the module map file in this directory. +    switch (loadModuleMapFile(*Dir, IsSystem, +                              llvm::sys::path::extension((*Dir)->getName()) == +                                  ".framework")) { +    case LMM_NewlyLoaded: +    case LMM_AlreadyLoaded: +      // Success. All of the directories we stepped through inherit this module +      // map file. +      for (unsigned I = 0, N = FixUpDirectories.size(); I != N; ++I) +        DirectoryHasModuleMap[FixUpDirectories[I]] = true; +      return true; + +    case LMM_NoDirectory: +    case LMM_InvalidModuleMap: +      break; +    } + +    // If we hit the top of our search, we're done. +    if (*Dir == Root) +      return false; + +    // Keep track of all of the directories we checked, so we can mark them as +    // having module maps if we eventually do find a module map. +    FixUpDirectories.push_back(*Dir); +  } while (true); +} + +ModuleMap::KnownHeader +HeaderSearch::findModuleForHeader(const FileEntry *File, +                                  bool AllowTextual) const { +  if (ExternalSource) { +    // Make sure the external source has handled header info about this file, +    // which includes whether the file is part of a module. +    (void)getExistingFileInfo(File); +  } +  return ModMap.findModuleForHeader(File, AllowTextual); +} + +static bool suggestModule(HeaderSearch &HS, const FileEntry *File, +                          Module *RequestingModule, +                          ModuleMap::KnownHeader *SuggestedModule) { +  ModuleMap::KnownHeader Module = +      HS.findModuleForHeader(File, /*AllowTextual*/true); +  if (SuggestedModule) +    *SuggestedModule = (Module.getRole() & ModuleMap::TextualHeader) +                           ? ModuleMap::KnownHeader() +                           : Module; + +  // If this module specifies [no_undeclared_includes], we cannot find any +  // file that's in a non-dependency module. +  if (RequestingModule && Module && RequestingModule->NoUndeclaredIncludes) { +    HS.getModuleMap().resolveUses(RequestingModule, /*Complain*/false); +    if (!RequestingModule->directlyUses(Module.getModule())) { +      return false; +    } +  } + +  return true; +} + +bool HeaderSearch::findUsableModuleForHeader( +    const FileEntry *File, const DirectoryEntry *Root, Module *RequestingModule, +    ModuleMap::KnownHeader *SuggestedModule, bool IsSystemHeaderDir) { +  if (File && needModuleLookup(RequestingModule, SuggestedModule)) { +    // If there is a module that corresponds to this header, suggest it. +    hasModuleMap(File->getName(), Root, IsSystemHeaderDir); +    return suggestModule(*this, File, RequestingModule, SuggestedModule); +  } +  return true; +} + +bool HeaderSearch::findUsableModuleForFrameworkHeader( +    const FileEntry *File, StringRef FrameworkName, Module *RequestingModule, +    ModuleMap::KnownHeader *SuggestedModule, bool IsSystemFramework) { +  // If we're supposed to suggest a module, look for one now. +  if (needModuleLookup(RequestingModule, SuggestedModule)) { +    // Find the top-level framework based on this framework. +    SmallVector<std::string, 4> SubmodulePath; +    const DirectoryEntry *TopFrameworkDir +      = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath); + +    // Determine the name of the top-level framework. +    StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName()); + +    // Load this framework module. If that succeeds, find the suggested module +    // for this header, if any. +    loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystemFramework); + +    // FIXME: This can find a module not part of ModuleName, which is +    // important so that we're consistent about whether this header +    // corresponds to a module. Possibly we should lock down framework modules +    // so that this is not possible. +    return suggestModule(*this, File, RequestingModule, SuggestedModule); +  } +  return true; +} + +static const FileEntry *getPrivateModuleMap(const FileEntry *File, +                                            FileManager &FileMgr) { +  StringRef Filename = llvm::sys::path::filename(File->getName()); +  SmallString<128>  PrivateFilename(File->getDir()->getName()); +  if (Filename == "module.map") +    llvm::sys::path::append(PrivateFilename, "module_private.map"); +  else if (Filename == "module.modulemap") +    llvm::sys::path::append(PrivateFilename, "module.private.modulemap"); +  else +    return nullptr; +  if (auto File = FileMgr.getFile(PrivateFilename)) +    return *File; +  return nullptr; +} + +bool HeaderSearch::loadModuleMapFile(const FileEntry *File, bool IsSystem, +                                     FileID ID, unsigned *Offset, +                                     StringRef OriginalModuleMapFile) { +  // Find the directory for the module. For frameworks, that may require going +  // up from the 'Modules' directory. +  const DirectoryEntry *Dir = nullptr; +  if (getHeaderSearchOpts().ModuleMapFileHomeIsCwd) { +    if (auto DirOrErr = FileMgr.getDirectory(".")) +      Dir = *DirOrErr; +  } else { +    if (!OriginalModuleMapFile.empty()) { +      // We're building a preprocessed module map. Find or invent the directory +      // that it originally occupied. +      auto DirOrErr = FileMgr.getDirectory( +          llvm::sys::path::parent_path(OriginalModuleMapFile)); +      if (DirOrErr) { +        Dir = *DirOrErr; +      } else { +        auto *FakeFile = FileMgr.getVirtualFile(OriginalModuleMapFile, 0, 0); +        Dir = FakeFile->getDir(); +      } +    } else { +      Dir = File->getDir(); +    } + +    StringRef DirName(Dir->getName()); +    if (llvm::sys::path::filename(DirName) == "Modules") { +      DirName = llvm::sys::path::parent_path(DirName); +      if (DirName.endswith(".framework")) +        if (auto DirOrErr = FileMgr.getDirectory(DirName)) +          Dir = *DirOrErr; +      // FIXME: This assert can fail if there's a race between the above check +      // and the removal of the directory. +      assert(Dir && "parent must exist"); +    } +  } + +  switch (loadModuleMapFileImpl(File, IsSystem, Dir, ID, Offset)) { +  case LMM_AlreadyLoaded: +  case LMM_NewlyLoaded: +    return false; +  case LMM_NoDirectory: +  case LMM_InvalidModuleMap: +    return true; +  } +  llvm_unreachable("Unknown load module map result"); +} + +HeaderSearch::LoadModuleMapResult +HeaderSearch::loadModuleMapFileImpl(const FileEntry *File, bool IsSystem, +                                    const DirectoryEntry *Dir, FileID ID, +                                    unsigned *Offset) { +  assert(File && "expected FileEntry"); + +  // Check whether we've already loaded this module map, and mark it as being +  // loaded in case we recursively try to load it from itself. +  auto AddResult = LoadedModuleMaps.insert(std::make_pair(File, true)); +  if (!AddResult.second) +    return AddResult.first->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap; + +  if (ModMap.parseModuleMapFile(File, IsSystem, Dir, ID, Offset)) { +    LoadedModuleMaps[File] = false; +    return LMM_InvalidModuleMap; +  } + +  // Try to load a corresponding private module map. +  if (const FileEntry *PMMFile = getPrivateModuleMap(File, FileMgr)) { +    if (ModMap.parseModuleMapFile(PMMFile, IsSystem, Dir)) { +      LoadedModuleMaps[File] = false; +      return LMM_InvalidModuleMap; +    } +  } + +  // This directory has a module map. +  return LMM_NewlyLoaded; +} + +const FileEntry * +HeaderSearch::lookupModuleMapFile(const DirectoryEntry *Dir, bool IsFramework) { +  if (!HSOpts->ImplicitModuleMaps) +    return nullptr; +  // For frameworks, the preferred spelling is Modules/module.modulemap, but +  // module.map at the framework root is also accepted. +  SmallString<128> ModuleMapFileName(Dir->getName()); +  if (IsFramework) +    llvm::sys::path::append(ModuleMapFileName, "Modules"); +  llvm::sys::path::append(ModuleMapFileName, "module.modulemap"); +  if (auto F = FileMgr.getFile(ModuleMapFileName)) +    return *F; + +  // Continue to allow module.map +  ModuleMapFileName = Dir->getName(); +  llvm::sys::path::append(ModuleMapFileName, "module.map"); +  if (auto F = FileMgr.getFile(ModuleMapFileName)) +    return *F; +  return nullptr; +} + +Module *HeaderSearch::loadFrameworkModule(StringRef Name, +                                          const DirectoryEntry *Dir, +                                          bool IsSystem) { +  if (Module *Module = ModMap.findModule(Name)) +    return Module; + +  // Try to load a module map file. +  switch (loadModuleMapFile(Dir, IsSystem, /*IsFramework*/true)) { +  case LMM_InvalidModuleMap: +    // Try to infer a module map from the framework directory. +    if (HSOpts->ImplicitModuleMaps) +      ModMap.inferFrameworkModule(Dir, IsSystem, /*Parent=*/nullptr); +    break; + +  case LMM_AlreadyLoaded: +  case LMM_NoDirectory: +    return nullptr; + +  case LMM_NewlyLoaded: +    break; +  } + +  return ModMap.findModule(Name); +} + +HeaderSearch::LoadModuleMapResult +HeaderSearch::loadModuleMapFile(StringRef DirName, bool IsSystem, +                                bool IsFramework) { +  if (auto Dir = FileMgr.getDirectory(DirName)) +    return loadModuleMapFile(*Dir, IsSystem, IsFramework); + +  return LMM_NoDirectory; +} + +HeaderSearch::LoadModuleMapResult +HeaderSearch::loadModuleMapFile(const DirectoryEntry *Dir, bool IsSystem, +                                bool IsFramework) { +  auto KnownDir = DirectoryHasModuleMap.find(Dir); +  if (KnownDir != DirectoryHasModuleMap.end()) +    return KnownDir->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap; + +  if (const FileEntry *ModuleMapFile = lookupModuleMapFile(Dir, IsFramework)) { +    LoadModuleMapResult Result = +        loadModuleMapFileImpl(ModuleMapFile, IsSystem, Dir); +    // Add Dir explicitly in case ModuleMapFile is in a subdirectory. +    // E.g. Foo.framework/Modules/module.modulemap +    //      ^Dir                  ^ModuleMapFile +    if (Result == LMM_NewlyLoaded) +      DirectoryHasModuleMap[Dir] = true; +    else if (Result == LMM_InvalidModuleMap) +      DirectoryHasModuleMap[Dir] = false; +    return Result; +  } +  return LMM_InvalidModuleMap; +} + +void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { +  Modules.clear(); + +  if (HSOpts->ImplicitModuleMaps) { +    // Load module maps for each of the header search directories. +    for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { +      bool IsSystem = SearchDirs[Idx].isSystemHeaderDirectory(); +      if (SearchDirs[Idx].isFramework()) { +        std::error_code EC; +        SmallString<128> DirNative; +        llvm::sys::path::native(SearchDirs[Idx].getFrameworkDir()->getName(), +                                DirNative); + +        // Search each of the ".framework" directories to load them as modules. +        llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); +        for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), +                                           DirEnd; +             Dir != DirEnd && !EC; Dir.increment(EC)) { +          if (llvm::sys::path::extension(Dir->path()) != ".framework") +            continue; + +          auto FrameworkDir = +              FileMgr.getDirectory(Dir->path()); +          if (!FrameworkDir) +            continue; + +          // Load this framework module. +          loadFrameworkModule(llvm::sys::path::stem(Dir->path()), *FrameworkDir, +                              IsSystem); +        } +        continue; +      } + +      // FIXME: Deal with header maps. +      if (SearchDirs[Idx].isHeaderMap()) +        continue; + +      // Try to load a module map file for the search directory. +      loadModuleMapFile(SearchDirs[Idx].getDir(), IsSystem, +                        /*IsFramework*/ false); + +      // Try to load module map files for immediate subdirectories of this +      // search directory. +      loadSubdirectoryModuleMaps(SearchDirs[Idx]); +    } +  } + +  // Populate the list of modules. +  for (ModuleMap::module_iterator M = ModMap.module_begin(), +                               MEnd = ModMap.module_end(); +       M != MEnd; ++M) { +    Modules.push_back(M->getValue()); +  } +} + +void HeaderSearch::loadTopLevelSystemModules() { +  if (!HSOpts->ImplicitModuleMaps) +    return; + +  // Load module maps for each of the header search directories. +  for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) { +    // We only care about normal header directories. +    if (!SearchDirs[Idx].isNormalDir()) { +      continue; +    } + +    // Try to load a module map file for the search directory. +    loadModuleMapFile(SearchDirs[Idx].getDir(), +                      SearchDirs[Idx].isSystemHeaderDirectory(), +                      SearchDirs[Idx].isFramework()); +  } +} + +void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) { +  assert(HSOpts->ImplicitModuleMaps && +         "Should not be loading subdirectory module maps"); + +  if (SearchDir.haveSearchedAllModuleMaps()) +    return; + +  std::error_code EC; +  SmallString<128> Dir = SearchDir.getDir()->getName(); +  FileMgr.makeAbsolutePath(Dir); +  SmallString<128> DirNative; +  llvm::sys::path::native(Dir, DirNative); +  llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); +  for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd; +       Dir != DirEnd && !EC; Dir.increment(EC)) { +    bool IsFramework = llvm::sys::path::extension(Dir->path()) == ".framework"; +    if (IsFramework == SearchDir.isFramework()) +      loadModuleMapFile(Dir->path(), SearchDir.isSystemHeaderDirectory(), +                        SearchDir.isFramework()); +  } + +  SearchDir.setSearchedAllModuleMaps(true); +} + +std::string HeaderSearch::suggestPathToFileForDiagnostics( +    const FileEntry *File, llvm::StringRef MainFile, bool *IsSystem) { +  // FIXME: We assume that the path name currently cached in the FileEntry is +  // the most appropriate one for this analysis (and that it's spelled the +  // same way as the corresponding header search path). +  return suggestPathToFileForDiagnostics(File->getName(), /*WorkingDir=*/"", +                                         MainFile, IsSystem); +} + +std::string HeaderSearch::suggestPathToFileForDiagnostics( +    llvm::StringRef File, llvm::StringRef WorkingDir, llvm::StringRef MainFile, +    bool *IsSystem) { +  using namespace llvm::sys; + +  unsigned BestPrefixLength = 0; +  // Checks whether Dir and File shares a common prefix, if they do and that's +  // the longest prefix we've seen so for it returns true and updates the +  // BestPrefixLength accordingly. +  auto CheckDir = [&](llvm::StringRef Dir) -> bool { +    llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); +    if (!WorkingDir.empty() && !path::is_absolute(Dir)) +      fs::make_absolute(WorkingDir, DirPath); +    path::remove_dots(DirPath, /*remove_dot_dot=*/true); +    Dir = DirPath; +    for (auto NI = path::begin(File), NE = path::end(File), +              DI = path::begin(Dir), DE = path::end(Dir); +         /*termination condition in loop*/; ++NI, ++DI) { +      // '.' components in File are ignored. +      while (NI != NE && *NI == ".") +        ++NI; +      if (NI == NE) +        break; + +      // '.' components in Dir are ignored. +      while (DI != DE && *DI == ".") +        ++DI; +      if (DI == DE) { +        // Dir is a prefix of File, up to '.' components and choice of path +        // separators. +        unsigned PrefixLength = NI - path::begin(File); +        if (PrefixLength > BestPrefixLength) { +          BestPrefixLength = PrefixLength; +          return true; +        } +        break; +      } + +      // Consider all path separators equal. +      if (NI->size() == 1 && DI->size() == 1 && +          path::is_separator(NI->front()) && path::is_separator(DI->front())) +        continue; + +      if (*NI != *DI) +        break; +    } +    return false; +  }; + +  for (unsigned I = 0; I != SearchDirs.size(); ++I) { +    // FIXME: Support this search within frameworks and header maps. +    if (!SearchDirs[I].isNormalDir()) +      continue; + +    StringRef Dir = SearchDirs[I].getDir()->getName(); +    if (CheckDir(Dir) && IsSystem) +      *IsSystem = BestPrefixLength ? I >= SystemDirIdx : false; +  } + +  // Try to shorten include path using TUs directory, if we couldn't find any +  // suitable prefix in include search paths. +  if (!BestPrefixLength && CheckDir(path::parent_path(MainFile)) && IsSystem) +    *IsSystem = false; + + +  return path::convert_to_slash(File.drop_front(BestPrefixLength)); +} diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp new file mode 100644 index 000000000000..17f5ab1e035d --- /dev/null +++ b/clang/lib/Lex/Lexer.cpp @@ -0,0 +1,3951 @@ +//===- Lexer.cpp - C Language Family Lexer --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements the Lexer and Token interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Lexer.h" +#include "UnicodeCharSets.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MultipleIncludeOpt.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Lex/Token.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/TokenKinds.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/NativeFormatting.h" +#include "llvm/Support/UnicodeCharRanges.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <string> +#include <tuple> +#include <utility> + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Token Class Implementation +//===----------------------------------------------------------------------===// + +/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. +bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { +  if (isAnnotation()) +    return false; +  if (IdentifierInfo *II = getIdentifierInfo()) +    return II->getObjCKeywordID() == objcKey; +  return false; +} + +/// getObjCKeywordID - Return the ObjC keyword kind. +tok::ObjCKeywordKind Token::getObjCKeywordID() const { +  if (isAnnotation()) +    return tok::objc_not_keyword; +  IdentifierInfo *specId = getIdentifierInfo(); +  return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; +} + +//===----------------------------------------------------------------------===// +// Lexer Class Implementation +//===----------------------------------------------------------------------===// + +void Lexer::anchor() {} + +void Lexer::InitLexer(const char *BufStart, const char *BufPtr, +                      const char *BufEnd) { +  BufferStart = BufStart; +  BufferPtr = BufPtr; +  BufferEnd = BufEnd; + +  assert(BufEnd[0] == 0 && +         "We assume that the input buffer has a null character at the end" +         " to simplify lexing!"); + +  // Check whether we have a BOM in the beginning of the buffer. If yes - act +  // accordingly. Right now we support only UTF-8 with and without BOM, so, just +  // skip the UTF-8 BOM if it's present. +  if (BufferStart == BufferPtr) { +    // Determine the size of the BOM. +    StringRef Buf(BufferStart, BufferEnd - BufferStart); +    size_t BOMLength = llvm::StringSwitch<size_t>(Buf) +      .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM +      .Default(0); + +    // Skip the BOM. +    BufferPtr += BOMLength; +  } + +  Is_PragmaLexer = false; +  CurrentConflictMarkerState = CMK_None; + +  // Start of the file is a start of line. +  IsAtStartOfLine = true; +  IsAtPhysicalStartOfLine = true; + +  HasLeadingSpace = false; +  HasLeadingEmptyMacro = false; + +  // We are not after parsing a #. +  ParsingPreprocessorDirective = false; + +  // We are not after parsing #include. +  ParsingFilename = false; + +  // We are not in raw mode.  Raw mode disables diagnostics and interpretation +  // of tokens (e.g. identifiers, thus disabling macro expansion).  It is used +  // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block +  // or otherwise skipping over tokens. +  LexingRawMode = false; + +  // Default to not keeping comments. +  ExtendedTokenMode = 0; +} + +/// Lexer constructor - Create a new lexer object for the specified buffer +/// with the specified preprocessor managing the lexing process.  This lexer +/// assumes that the associated file buffer and Preprocessor objects will +/// outlive it, so it doesn't take ownership of either of them. +Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) +    : PreprocessorLexer(&PP, FID), +      FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)), +      LangOpts(PP.getLangOpts()) { +  InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(), +            InputFile->getBufferEnd()); + +  resetExtendedTokenMode(); +} + +/// Lexer constructor - Create a new raw lexer object.  This object is only +/// suitable for calls to 'LexFromRawLexer'.  This lexer assumes that the text +/// range will outlive it, so it doesn't take ownership of it. +Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, +             const char *BufStart, const char *BufPtr, const char *BufEnd) +    : FileLoc(fileloc), LangOpts(langOpts) { +  InitLexer(BufStart, BufPtr, BufEnd); + +  // We *are* in raw mode. +  LexingRawMode = true; +} + +/// Lexer constructor - Create a new raw lexer object.  This object is only +/// suitable for calls to 'LexFromRawLexer'.  This lexer assumes that the text +/// range will outlive it, so it doesn't take ownership of it. +Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile, +             const SourceManager &SM, const LangOptions &langOpts) +    : Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(), +            FromFile->getBufferStart(), FromFile->getBufferEnd()) {} + +void Lexer::resetExtendedTokenMode() { +  assert(PP && "Cannot reset token mode without a preprocessor"); +  if (LangOpts.TraditionalCPP) +    SetKeepWhitespaceMode(true); +  else +    SetCommentRetentionState(PP->getCommentRetentionState()); +} + +/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for +/// _Pragma expansion.  This has a variety of magic semantics that this method +/// sets up.  It returns a new'd Lexer that must be delete'd when done. +/// +/// On entrance to this routine, TokStartLoc is a macro location which has a +/// spelling loc that indicates the bytes to be lexed for the token and an +/// expansion location that indicates where all lexed tokens should be +/// "expanded from". +/// +/// TODO: It would really be nice to make _Pragma just be a wrapper around a +/// normal lexer that remaps tokens as they fly by.  This would require making +/// Preprocessor::Lex virtual.  Given that, we could just dump in a magic lexer +/// interface that could handle this stuff.  This would pull GetMappedTokenLoc +/// out of the critical path of the lexer! +/// +Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, +                                 SourceLocation ExpansionLocStart, +                                 SourceLocation ExpansionLocEnd, +                                 unsigned TokLen, Preprocessor &PP) { +  SourceManager &SM = PP.getSourceManager(); + +  // Create the lexer as if we were going to lex the file normally. +  FileID SpellingFID = SM.getFileID(SpellingLoc); +  const llvm::MemoryBuffer *InputFile = SM.getBuffer(SpellingFID); +  Lexer *L = new Lexer(SpellingFID, InputFile, PP); + +  // Now that the lexer is created, change the start/end locations so that we +  // just lex the subsection of the file that we want.  This is lexing from a +  // scratch buffer. +  const char *StrData = SM.getCharacterData(SpellingLoc); + +  L->BufferPtr = StrData; +  L->BufferEnd = StrData+TokLen; +  assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!"); + +  // Set the SourceLocation with the remapping information.  This ensures that +  // GetMappedTokenLoc will remap the tokens as they are lexed. +  L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID), +                                     ExpansionLocStart, +                                     ExpansionLocEnd, TokLen); + +  // Ensure that the lexer thinks it is inside a directive, so that end \n will +  // return an EOD token. +  L->ParsingPreprocessorDirective = true; + +  // This lexer really is for _Pragma. +  L->Is_PragmaLexer = true; +  return L; +} + +bool Lexer::skipOver(unsigned NumBytes) { +  IsAtPhysicalStartOfLine = true; +  IsAtStartOfLine = true; +  if ((BufferPtr + NumBytes) > BufferEnd) +    return true; +  BufferPtr += NumBytes; +  return false; +} + +template <typename T> static void StringifyImpl(T &Str, char Quote) { +  typename T::size_type i = 0, e = Str.size(); +  while (i < e) { +    if (Str[i] == '\\' || Str[i] == Quote) { +      Str.insert(Str.begin() + i, '\\'); +      i += 2; +      ++e; +    } else if (Str[i] == '\n' || Str[i] == '\r') { +      // Replace '\r\n' and '\n\r' to '\\' followed by 'n'. +      if ((i < e - 1) && (Str[i + 1] == '\n' || Str[i + 1] == '\r') && +          Str[i] != Str[i + 1]) { +        Str[i] = '\\'; +        Str[i + 1] = 'n'; +      } else { +        // Replace '\n' and '\r' to '\\' followed by 'n'. +        Str[i] = '\\'; +        Str.insert(Str.begin() + i + 1, 'n'); +        ++e; +      } +      i += 2; +    } else +      ++i; +  } +} + +std::string Lexer::Stringify(StringRef Str, bool Charify) { +  std::string Result = Str; +  char Quote = Charify ? '\'' : '"'; +  StringifyImpl(Result, Quote); +  return Result; +} + +void Lexer::Stringify(SmallVectorImpl<char> &Str) { StringifyImpl(Str, '"'); } + +//===----------------------------------------------------------------------===// +// Token Spelling +//===----------------------------------------------------------------------===// + +/// Slow case of getSpelling. Extract the characters comprising the +/// spelling of this token from the provided input buffer. +static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, +                              const LangOptions &LangOpts, char *Spelling) { +  assert(Tok.needsCleaning() && "getSpellingSlow called on simple token"); + +  size_t Length = 0; +  const char *BufEnd = BufPtr + Tok.getLength(); + +  if (tok::isStringLiteral(Tok.getKind())) { +    // Munch the encoding-prefix and opening double-quote. +    while (BufPtr < BufEnd) { +      unsigned Size; +      Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); +      BufPtr += Size; + +      if (Spelling[Length - 1] == '"') +        break; +    } + +    // Raw string literals need special handling; trigraph expansion and line +    // splicing do not occur within their d-char-sequence nor within their +    // r-char-sequence. +    if (Length >= 2 && +        Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') { +      // Search backwards from the end of the token to find the matching closing +      // quote. +      const char *RawEnd = BufEnd; +      do --RawEnd; while (*RawEnd != '"'); +      size_t RawLength = RawEnd - BufPtr + 1; + +      // Everything between the quotes is included verbatim in the spelling. +      memcpy(Spelling + Length, BufPtr, RawLength); +      Length += RawLength; +      BufPtr += RawLength; + +      // The rest of the token is lexed normally. +    } +  } + +  while (BufPtr < BufEnd) { +    unsigned Size; +    Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); +    BufPtr += Size; +  } + +  assert(Length < Tok.getLength() && +         "NeedsCleaning flag set on token that didn't need cleaning!"); +  return Length; +} + +/// getSpelling() - Return the 'spelling' of this token.  The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding.  In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. +StringRef Lexer::getSpelling(SourceLocation loc, +                             SmallVectorImpl<char> &buffer, +                             const SourceManager &SM, +                             const LangOptions &options, +                             bool *invalid) { +  // Break down the source location. +  std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc); + +  // Try to the load the file buffer. +  bool invalidTemp = false; +  StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); +  if (invalidTemp) { +    if (invalid) *invalid = true; +    return {}; +  } + +  const char *tokenBegin = file.data() + locInfo.second; + +  // Lex from the start of the given location. +  Lexer lexer(SM.getLocForStartOfFile(locInfo.first), options, +              file.begin(), tokenBegin, file.end()); +  Token token; +  lexer.LexFromRawLexer(token); + +  unsigned length = token.getLength(); + +  // Common case:  no need for cleaning. +  if (!token.needsCleaning()) +    return StringRef(tokenBegin, length); + +  // Hard case, we need to relex the characters into the string. +  buffer.resize(length); +  buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data())); +  return StringRef(buffer.data(), buffer.size()); +} + +/// getSpelling() - Return the 'spelling' of this token.  The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding.  In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. +std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr, +                               const LangOptions &LangOpts, bool *Invalid) { +  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + +  bool CharDataInvalid = false; +  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(), +                                                    &CharDataInvalid); +  if (Invalid) +    *Invalid = CharDataInvalid; +  if (CharDataInvalid) +    return {}; + +  // If this token contains nothing interesting, return it directly. +  if (!Tok.needsCleaning()) +    return std::string(TokStart, TokStart + Tok.getLength()); + +  std::string Result; +  Result.resize(Tok.getLength()); +  Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin())); +  return Result; +} + +/// getSpelling - This method is used to get the spelling of a token into a +/// preallocated buffer, instead of as an std::string.  The caller is required +/// to allocate enough space for the token, which is guaranteed to be at least +/// Tok.getLength() bytes long.  The actual length of the token is returned. +/// +/// Note that this method may do two possible things: it may either fill in +/// the buffer specified with characters, or it may *change the input pointer* +/// to point to a constant buffer with the data already in it (avoiding a +/// copy).  The caller is not allowed to modify the returned buffer pointer +/// if an internal buffer is returned. +unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, +                            const SourceManager &SourceMgr, +                            const LangOptions &LangOpts, bool *Invalid) { +  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + +  const char *TokStart = nullptr; +  // NOTE: this has to be checked *before* testing for an IdentifierInfo. +  if (Tok.is(tok::raw_identifier)) +    TokStart = Tok.getRawIdentifier().data(); +  else if (!Tok.hasUCN()) { +    if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { +      // Just return the string from the identifier table, which is very quick. +      Buffer = II->getNameStart(); +      return II->getLength(); +    } +  } + +  // NOTE: this can be checked even after testing for an IdentifierInfo. +  if (Tok.isLiteral()) +    TokStart = Tok.getLiteralData(); + +  if (!TokStart) { +    // Compute the start of the token in the input lexer buffer. +    bool CharDataInvalid = false; +    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); +    if (Invalid) +      *Invalid = CharDataInvalid; +    if (CharDataInvalid) { +      Buffer = ""; +      return 0; +    } +  } + +  // If this token contains nothing interesting, return it directly. +  if (!Tok.needsCleaning()) { +    Buffer = TokStart; +    return Tok.getLength(); +  } + +  // Otherwise, hard case, relex the characters into the string. +  return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer)); +} + +/// MeasureTokenLength - Relex the token at the specified location and return +/// its length in bytes in the input file.  If the token needs cleaning (e.g. +/// includes a trigraph or an escaped newline) then this count includes bytes +/// that are part of that. +unsigned Lexer::MeasureTokenLength(SourceLocation Loc, +                                   const SourceManager &SM, +                                   const LangOptions &LangOpts) { +  Token TheTok; +  if (getRawToken(Loc, TheTok, SM, LangOpts)) +    return 0; +  return TheTok.getLength(); +} + +/// Relex the token at the specified location. +/// \returns true if there was a failure, false on success. +bool Lexer::getRawToken(SourceLocation Loc, Token &Result, +                        const SourceManager &SM, +                        const LangOptions &LangOpts, +                        bool IgnoreWhiteSpace) { +  // TODO: this could be special cased for common tokens like identifiers, ')', +  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle +  // all obviously single-char tokens.  This could use +  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or +  // something. + +  // If this comes from a macro expansion, we really do want the macro name, not +  // the token this macro expanded to. +  Loc = SM.getExpansionLoc(Loc); +  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); +  bool Invalid = false; +  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); +  if (Invalid) +    return true; + +  const char *StrData = Buffer.data()+LocInfo.second; + +  if (!IgnoreWhiteSpace && isWhitespace(StrData[0])) +    return true; + +  // Create a lexer starting at the beginning of this token. +  Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, +                 Buffer.begin(), StrData, Buffer.end()); +  TheLexer.SetCommentRetentionState(true); +  TheLexer.LexFromRawLexer(Result); +  return false; +} + +/// Returns the pointer that points to the beginning of line that contains +/// the given offset, or null if the offset if invalid. +static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) { +  const char *BufStart = Buffer.data(); +  if (Offset >= Buffer.size()) +    return nullptr; + +  const char *LexStart = BufStart + Offset; +  for (; LexStart != BufStart; --LexStart) { +    if (isVerticalWhitespace(LexStart[0]) && +        !Lexer::isNewLineEscaped(BufStart, LexStart)) { +      // LexStart should point at first character of logical line. +      ++LexStart; +      break; +    } +  } +  return LexStart; +} + +static SourceLocation getBeginningOfFileToken(SourceLocation Loc, +                                              const SourceManager &SM, +                                              const LangOptions &LangOpts) { +  assert(Loc.isFileID()); +  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); +  if (LocInfo.first.isInvalid()) +    return Loc; + +  bool Invalid = false; +  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); +  if (Invalid) +    return Loc; + +  // Back up from the current location until we hit the beginning of a line +  // (or the buffer). We'll relex from that point. +  const char *StrData = Buffer.data() + LocInfo.second; +  const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second); +  if (!LexStart || LexStart == StrData) +    return Loc; + +  // Create a lexer starting at the beginning of this token. +  SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); +  Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart, +                 Buffer.end()); +  TheLexer.SetCommentRetentionState(true); + +  // Lex tokens until we find the token that contains the source location. +  Token TheTok; +  do { +    TheLexer.LexFromRawLexer(TheTok); + +    if (TheLexer.getBufferLocation() > StrData) { +      // Lexing this token has taken the lexer past the source location we're +      // looking for. If the current token encompasses our source location, +      // return the beginning of that token. +      if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) +        return TheTok.getLocation(); + +      // We ended up skipping over the source location entirely, which means +      // that it points into whitespace. We're done here. +      break; +    } +  } while (TheTok.getKind() != tok::eof); + +  // We've passed our source location; just return the original source location. +  return Loc; +} + +SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, +                                          const SourceManager &SM, +                                          const LangOptions &LangOpts) { +  if (Loc.isFileID()) +    return getBeginningOfFileToken(Loc, SM, LangOpts); + +  if (!SM.isMacroArgExpansion(Loc)) +    return Loc; + +  SourceLocation FileLoc = SM.getSpellingLoc(Loc); +  SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); +  std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); +  std::pair<FileID, unsigned> BeginFileLocInfo = +      SM.getDecomposedLoc(BeginFileLoc); +  assert(FileLocInfo.first == BeginFileLocInfo.first && +         FileLocInfo.second >= BeginFileLocInfo.second); +  return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); +} + +namespace { + +enum PreambleDirectiveKind { +  PDK_Skipped, +  PDK_Unknown +}; + +} // namespace + +PreambleBounds Lexer::ComputePreamble(StringRef Buffer, +                                      const LangOptions &LangOpts, +                                      unsigned MaxLines) { +  // Create a lexer starting at the beginning of the file. Note that we use a +  // "fake" file source location at offset 1 so that the lexer will track our +  // position within the file. +  const unsigned StartOffset = 1; +  SourceLocation FileLoc = SourceLocation::getFromRawEncoding(StartOffset); +  Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(), +                 Buffer.end()); +  TheLexer.SetCommentRetentionState(true); + +  bool InPreprocessorDirective = false; +  Token TheTok; +  SourceLocation ActiveCommentLoc; + +  unsigned MaxLineOffset = 0; +  if (MaxLines) { +    const char *CurPtr = Buffer.begin(); +    unsigned CurLine = 0; +    while (CurPtr != Buffer.end()) { +      char ch = *CurPtr++; +      if (ch == '\n') { +        ++CurLine; +        if (CurLine == MaxLines) +          break; +      } +    } +    if (CurPtr != Buffer.end()) +      MaxLineOffset = CurPtr - Buffer.begin(); +  } + +  do { +    TheLexer.LexFromRawLexer(TheTok); + +    if (InPreprocessorDirective) { +      // If we've hit the end of the file, we're done. +      if (TheTok.getKind() == tok::eof) { +        break; +      } + +      // If we haven't hit the end of the preprocessor directive, skip this +      // token. +      if (!TheTok.isAtStartOfLine()) +        continue; + +      // We've passed the end of the preprocessor directive, and will look +      // at this token again below. +      InPreprocessorDirective = false; +    } + +    // Keep track of the # of lines in the preamble. +    if (TheTok.isAtStartOfLine()) { +      unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset; + +      // If we were asked to limit the number of lines in the preamble, +      // and we're about to exceed that limit, we're done. +      if (MaxLineOffset && TokOffset >= MaxLineOffset) +        break; +    } + +    // Comments are okay; skip over them. +    if (TheTok.getKind() == tok::comment) { +      if (ActiveCommentLoc.isInvalid()) +        ActiveCommentLoc = TheTok.getLocation(); +      continue; +    } + +    if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) { +      // This is the start of a preprocessor directive. +      Token HashTok = TheTok; +      InPreprocessorDirective = true; +      ActiveCommentLoc = SourceLocation(); + +      // Figure out which directive this is. Since we're lexing raw tokens, +      // we don't have an identifier table available. Instead, just look at +      // the raw identifier to recognize and categorize preprocessor directives. +      TheLexer.LexFromRawLexer(TheTok); +      if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) { +        StringRef Keyword = TheTok.getRawIdentifier(); +        PreambleDirectiveKind PDK +          = llvm::StringSwitch<PreambleDirectiveKind>(Keyword) +              .Case("include", PDK_Skipped) +              .Case("__include_macros", PDK_Skipped) +              .Case("define", PDK_Skipped) +              .Case("undef", PDK_Skipped) +              .Case("line", PDK_Skipped) +              .Case("error", PDK_Skipped) +              .Case("pragma", PDK_Skipped) +              .Case("import", PDK_Skipped) +              .Case("include_next", PDK_Skipped) +              .Case("warning", PDK_Skipped) +              .Case("ident", PDK_Skipped) +              .Case("sccs", PDK_Skipped) +              .Case("assert", PDK_Skipped) +              .Case("unassert", PDK_Skipped) +              .Case("if", PDK_Skipped) +              .Case("ifdef", PDK_Skipped) +              .Case("ifndef", PDK_Skipped) +              .Case("elif", PDK_Skipped) +              .Case("else", PDK_Skipped) +              .Case("endif", PDK_Skipped) +              .Default(PDK_Unknown); + +        switch (PDK) { +        case PDK_Skipped: +          continue; + +        case PDK_Unknown: +          // We don't know what this directive is; stop at the '#'. +          break; +        } +      } + +      // We only end up here if we didn't recognize the preprocessor +      // directive or it was one that can't occur in the preamble at this +      // point. Roll back the current token to the location of the '#'. +      TheTok = HashTok; +    } + +    // We hit a token that we don't recognize as being in the +    // "preprocessing only" part of the file, so we're no longer in +    // the preamble. +    break; +  } while (true); + +  SourceLocation End; +  if (ActiveCommentLoc.isValid()) +    End = ActiveCommentLoc; // don't truncate a decl comment. +  else +    End = TheTok.getLocation(); + +  return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(), +                        TheTok.isAtStartOfLine()); +} + +unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, +                                     const SourceManager &SM, +                                     const LangOptions &LangOpts) { +  // Figure out how many physical characters away the specified expansion +  // character is.  This needs to take into consideration newlines and +  // trigraphs. +  bool Invalid = false; +  const char *TokPtr = SM.getCharacterData(TokStart, &Invalid); + +  // If they request the first char of the token, we're trivially done. +  if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) +    return 0; + +  unsigned PhysOffset = 0; + +  // The usual case is that tokens don't contain anything interesting.  Skip +  // over the uninteresting characters.  If a token only consists of simple +  // chars, this method is extremely fast. +  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { +    if (CharNo == 0) +      return PhysOffset; +    ++TokPtr; +    --CharNo; +    ++PhysOffset; +  } + +  // If we have a character that may be a trigraph or escaped newline, use a +  // lexer to parse it correctly. +  for (; CharNo; --CharNo) { +    unsigned Size; +    Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts); +    TokPtr += Size; +    PhysOffset += Size; +  } + +  // Final detail: if we end up on an escaped newline, we want to return the +  // location of the actual byte of the token.  For example foo\<newline>bar +  // advanced by 3 should return the location of b, not of \\.  One compounding +  // detail of this is that the escape may be made by a trigraph. +  if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) +    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; + +  return PhysOffset; +} + +/// Computes the source location just past the end of the +/// token at this source location. +/// +/// This routine can be used to produce a source location that +/// points just past the end of the token referenced by \p Loc, and +/// is generally used when a diagnostic needs to point just after a +/// token where it expected something different that it received. If +/// the returned source location would not be meaningful (e.g., if +/// it points into a macro), this routine returns an invalid +/// source location. +/// +/// \param Offset an offset from the end of the token, where the source +/// location should refer to. The default offset (0) produces a source +/// location pointing just past the end of the token; an offset of 1 produces +/// a source location pointing to the last character in the token, etc. +SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, +                                          const SourceManager &SM, +                                          const LangOptions &LangOpts) { +  if (Loc.isInvalid()) +    return {}; + +  if (Loc.isMacroID()) { +    if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) +      return {}; // Points inside the macro expansion. +  } + +  unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts); +  if (Len > Offset) +    Len = Len - Offset; +  else +    return Loc; + +  return Loc.getLocWithOffset(Len); +} + +/// Returns true if the given MacroID location points at the first +/// token of the macro expansion. +bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, +                                      const SourceManager &SM, +                                      const LangOptions &LangOpts, +                                      SourceLocation *MacroBegin) { +  assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); + +  SourceLocation expansionLoc; +  if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc)) +    return false; + +  if (expansionLoc.isFileID()) { +    // No other macro expansions, this is the first. +    if (MacroBegin) +      *MacroBegin = expansionLoc; +    return true; +  } + +  return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin); +} + +/// Returns true if the given MacroID location points at the last +/// token of the macro expansion. +bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, +                                    const SourceManager &SM, +                                    const LangOptions &LangOpts, +                                    SourceLocation *MacroEnd) { +  assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); + +  SourceLocation spellLoc = SM.getSpellingLoc(loc); +  unsigned tokLen = MeasureTokenLength(spellLoc, SM, LangOpts); +  if (tokLen == 0) +    return false; + +  SourceLocation afterLoc = loc.getLocWithOffset(tokLen); +  SourceLocation expansionLoc; +  if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc)) +    return false; + +  if (expansionLoc.isFileID()) { +    // No other macro expansions. +    if (MacroEnd) +      *MacroEnd = expansionLoc; +    return true; +  } + +  return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts, MacroEnd); +} + +static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, +                                             const SourceManager &SM, +                                             const LangOptions &LangOpts) { +  SourceLocation Begin = Range.getBegin(); +  SourceLocation End = Range.getEnd(); +  assert(Begin.isFileID() && End.isFileID()); +  if (Range.isTokenRange()) { +    End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts); +    if (End.isInvalid()) +      return {}; +  } + +  // Break down the source locations. +  FileID FID; +  unsigned BeginOffs; +  std::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); +  if (FID.isInvalid()) +    return {}; + +  unsigned EndOffs; +  if (!SM.isInFileID(End, FID, &EndOffs) || +      BeginOffs > EndOffs) +    return {}; + +  return CharSourceRange::getCharRange(Begin, End); +} + +CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, +                                         const SourceManager &SM, +                                         const LangOptions &LangOpts) { +  SourceLocation Begin = Range.getBegin(); +  SourceLocation End = Range.getEnd(); +  if (Begin.isInvalid() || End.isInvalid()) +    return {}; + +  if (Begin.isFileID() && End.isFileID()) +    return makeRangeFromFileLocs(Range, SM, LangOpts); + +  if (Begin.isMacroID() && End.isFileID()) { +    if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin)) +      return {}; +    Range.setBegin(Begin); +    return makeRangeFromFileLocs(Range, SM, LangOpts); +  } + +  if (Begin.isFileID() && End.isMacroID()) { +    if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts, +                                                          &End)) || +        (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts, +                                                           &End))) +      return {}; +    Range.setEnd(End); +    return makeRangeFromFileLocs(Range, SM, LangOpts); +  } + +  assert(Begin.isMacroID() && End.isMacroID()); +  SourceLocation MacroBegin, MacroEnd; +  if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) && +      ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts, +                                                        &MacroEnd)) || +       (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts, +                                                         &MacroEnd)))) { +    Range.setBegin(MacroBegin); +    Range.setEnd(MacroEnd); +    return makeRangeFromFileLocs(Range, SM, LangOpts); +  } + +  bool Invalid = false; +  const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), +                                                        &Invalid); +  if (Invalid) +    return {}; + +  if (BeginEntry.getExpansion().isMacroArgExpansion()) { +    const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), +                                                        &Invalid); +    if (Invalid) +      return {}; + +    if (EndEntry.getExpansion().isMacroArgExpansion() && +        BeginEntry.getExpansion().getExpansionLocStart() == +            EndEntry.getExpansion().getExpansionLocStart()) { +      Range.setBegin(SM.getImmediateSpellingLoc(Begin)); +      Range.setEnd(SM.getImmediateSpellingLoc(End)); +      return makeFileCharRange(Range, SM, LangOpts); +    } +  } + +  return {}; +} + +StringRef Lexer::getSourceText(CharSourceRange Range, +                               const SourceManager &SM, +                               const LangOptions &LangOpts, +                               bool *Invalid) { +  Range = makeFileCharRange(Range, SM, LangOpts); +  if (Range.isInvalid()) { +    if (Invalid) *Invalid = true; +    return {}; +  } + +  // Break down the source location. +  std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin()); +  if (beginInfo.first.isInvalid()) { +    if (Invalid) *Invalid = true; +    return {}; +  } + +  unsigned EndOffs; +  if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) || +      beginInfo.second > EndOffs) { +    if (Invalid) *Invalid = true; +    return {}; +  } + +  // Try to the load the file buffer. +  bool invalidTemp = false; +  StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp); +  if (invalidTemp) { +    if (Invalid) *Invalid = true; +    return {}; +  } + +  if (Invalid) *Invalid = false; +  return file.substr(beginInfo.second, EndOffs - beginInfo.second); +} + +StringRef Lexer::getImmediateMacroName(SourceLocation Loc, +                                       const SourceManager &SM, +                                       const LangOptions &LangOpts) { +  assert(Loc.isMacroID() && "Only reasonable to call this on macros"); + +  // Find the location of the immediate macro expansion. +  while (true) { +    FileID FID = SM.getFileID(Loc); +    const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); +    const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); +    Loc = Expansion.getExpansionLocStart(); +    if (!Expansion.isMacroArgExpansion()) +      break; + +    // For macro arguments we need to check that the argument did not come +    // from an inner macro, e.g: "MAC1( MAC2(foo) )" + +    // Loc points to the argument id of the macro definition, move to the +    // macro expansion. +    Loc = SM.getImmediateExpansionRange(Loc).getBegin(); +    SourceLocation SpellLoc = Expansion.getSpellingLoc(); +    if (SpellLoc.isFileID()) +      break; // No inner macro. + +    // If spelling location resides in the same FileID as macro expansion +    // location, it means there is no inner macro. +    FileID MacroFID = SM.getFileID(Loc); +    if (SM.isInFileID(SpellLoc, MacroFID)) +      break; + +    // Argument came from inner macro. +    Loc = SpellLoc; +  } + +  // Find the spelling location of the start of the non-argument expansion +  // range. This is where the macro name was spelled in order to begin +  // expanding this macro. +  Loc = SM.getSpellingLoc(Loc); + +  // Dig out the buffer where the macro name was spelled and the extents of the +  // name so that we can render it into the expansion note. +  std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc); +  unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts); +  StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first); +  return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); +} + +StringRef Lexer::getImmediateMacroNameForDiagnostics( +    SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { +  assert(Loc.isMacroID() && "Only reasonable to call this on macros"); +  // Walk past macro argument expansions. +  while (SM.isMacroArgExpansion(Loc)) +    Loc = SM.getImmediateExpansionRange(Loc).getBegin(); + +  // If the macro's spelling has no FileID, then it's actually a token paste +  // or stringization (or similar) and not a macro at all. +  if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc)))) +    return {}; + +  // Find the spelling location of the start of the non-argument expansion +  // range. This is where the macro name was spelled in order to begin +  // expanding this macro. +  Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).getBegin()); + +  // Dig out the buffer where the macro name was spelled and the extents of the +  // name so that we can render it into the expansion note. +  std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc); +  unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts); +  StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first); +  return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); +} + +bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { +  return isIdentifierBody(c, LangOpts.DollarIdents); +} + +bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { +  assert(isVerticalWhitespace(Str[0])); +  if (Str - 1 < BufferStart) +    return false; + +  if ((Str[0] == '\n' && Str[-1] == '\r') || +      (Str[0] == '\r' && Str[-1] == '\n')) { +    if (Str - 2 < BufferStart) +      return false; +    --Str; +  } +  --Str; + +  // Rewind to first non-space character: +  while (Str > BufferStart && isHorizontalWhitespace(*Str)) +    --Str; + +  return *Str == '\\'; +} + +StringRef Lexer::getIndentationForLine(SourceLocation Loc, +                                       const SourceManager &SM) { +  if (Loc.isInvalid() || Loc.isMacroID()) +    return {}; +  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); +  if (LocInfo.first.isInvalid()) +    return {}; +  bool Invalid = false; +  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); +  if (Invalid) +    return {}; +  const char *Line = findBeginningOfLine(Buffer, LocInfo.second); +  if (!Line) +    return {}; +  StringRef Rest = Buffer.substr(Line - Buffer.data()); +  size_t NumWhitespaceChars = Rest.find_first_not_of(" \t"); +  return NumWhitespaceChars == StringRef::npos +             ? "" +             : Rest.take_front(NumWhitespaceChars); +} + +//===----------------------------------------------------------------------===// +// Diagnostics forwarding code. +//===----------------------------------------------------------------------===// + +/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the +/// lexer buffer was all expanded at a single point, perform the mapping. +/// This is currently only used for _Pragma implementation, so it is the slow +/// path of the hot getSourceLocation method.  Do not allow it to be inlined. +static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc( +    Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen); +static SourceLocation GetMappedTokenLoc(Preprocessor &PP, +                                        SourceLocation FileLoc, +                                        unsigned CharNo, unsigned TokLen) { +  assert(FileLoc.isMacroID() && "Must be a macro expansion"); + +  // Otherwise, we're lexing "mapped tokens".  This is used for things like +  // _Pragma handling.  Combine the expansion location of FileLoc with the +  // spelling location. +  SourceManager &SM = PP.getSourceManager(); + +  // Create a new SLoc which is expanded from Expansion(FileLoc) but whose +  // characters come from spelling(FileLoc)+Offset. +  SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc); +  SpellingLoc = SpellingLoc.getLocWithOffset(CharNo); + +  // Figure out the expansion loc range, which is the range covered by the +  // original _Pragma(...) sequence. +  CharSourceRange II = SM.getImmediateExpansionRange(FileLoc); + +  return SM.createExpansionLoc(SpellingLoc, II.getBegin(), II.getEnd(), TokLen); +} + +/// getSourceLocation - Return a source location identifier for the specified +/// offset in the current file. +SourceLocation Lexer::getSourceLocation(const char *Loc, +                                        unsigned TokLen) const { +  assert(Loc >= BufferStart && Loc <= BufferEnd && +         "Location out of range for this buffer!"); + +  // In the normal case, we're just lexing from a simple file buffer, return +  // the file id from FileLoc with the offset specified. +  unsigned CharNo = Loc-BufferStart; +  if (FileLoc.isFileID()) +    return FileLoc.getLocWithOffset(CharNo); + +  // Otherwise, this is the _Pragma lexer case, which pretends that all of the +  // tokens are lexed from where the _Pragma was defined. +  assert(PP && "This doesn't work on raw lexers"); +  return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen); +} + +/// Diag - Forwarding function for diagnostics.  This translate a source +/// position in the current buffer into a SourceLocation object for rendering. +DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const { +  return PP->Diag(getSourceLocation(Loc), DiagID); +} + +//===----------------------------------------------------------------------===// +// Trigraph and Escaped Newline Handling Code. +//===----------------------------------------------------------------------===// + +/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, +/// return the decoded trigraph letter it corresponds to, or '\0' if nothing. +static char GetTrigraphCharForLetter(char Letter) { +  switch (Letter) { +  default:   return 0; +  case '=':  return '#'; +  case ')':  return ']'; +  case '(':  return '['; +  case '!':  return '|'; +  case '\'': return '^'; +  case '>':  return '}'; +  case '/':  return '\\'; +  case '<':  return '{'; +  case '-':  return '~'; +  } +} + +/// DecodeTrigraphChar - If the specified character is a legal trigraph when +/// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled, +/// return the result character.  Finally, emit a warning about trigraph use +/// whether trigraphs are enabled or not. +static char DecodeTrigraphChar(const char *CP, Lexer *L) { +  char Res = GetTrigraphCharForLetter(*CP); +  if (!Res || !L) return Res; + +  if (!L->getLangOpts().Trigraphs) { +    if (!L->isLexingRawMode()) +      L->Diag(CP-2, diag::trigraph_ignored); +    return 0; +  } + +  if (!L->isLexingRawMode()) +    L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1); +  return Res; +} + +/// getEscapedNewLineSize - Return the size of the specified escaped newline, +/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a +/// trigraph equivalent on entry to this function. +unsigned Lexer::getEscapedNewLineSize(const char *Ptr) { +  unsigned Size = 0; +  while (isWhitespace(Ptr[Size])) { +    ++Size; + +    if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r') +      continue; + +    // If this is a \r\n or \n\r, skip the other half. +    if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') && +        Ptr[Size-1] != Ptr[Size]) +      ++Size; + +    return Size; +  } + +  // Not an escaped newline, must be a \t or something else. +  return 0; +} + +/// SkipEscapedNewLines - If P points to an escaped newline (or a series of +/// them), skip over them and return the first non-escaped-newline found, +/// otherwise return P. +const char *Lexer::SkipEscapedNewLines(const char *P) { +  while (true) { +    const char *AfterEscape; +    if (*P == '\\') { +      AfterEscape = P+1; +    } else if (*P == '?') { +      // If not a trigraph for escape, bail out. +      if (P[1] != '?' || P[2] != '/') +        return P; +      // FIXME: Take LangOpts into account; the language might not +      // support trigraphs. +      AfterEscape = P+3; +    } else { +      return P; +    } + +    unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape); +    if (NewLineSize == 0) return P; +    P = AfterEscape+NewLineSize; +  } +} + +Optional<Token> Lexer::findNextToken(SourceLocation Loc, +                                     const SourceManager &SM, +                                     const LangOptions &LangOpts) { +  if (Loc.isMacroID()) { +    if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) +      return None; +  } +  Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); + +  // Break down the source location. +  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); + +  // Try to load the file buffer. +  bool InvalidTemp = false; +  StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); +  if (InvalidTemp) +    return None; + +  const char *TokenBegin = File.data() + LocInfo.second; + +  // Lex from the start of the given location. +  Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(), +                                      TokenBegin, File.end()); +  // Find the token. +  Token Tok; +  lexer.LexFromRawLexer(Tok); +  return Tok; +} + +/// Checks that the given token is the first token that occurs after the +/// given location (this excludes comments and whitespace). Returns the location +/// immediately after the specified token. If the token is not found or the +/// location is inside a macro, the returned source location will be invalid. +SourceLocation Lexer::findLocationAfterToken( +    SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM, +    const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) { +  Optional<Token> Tok = findNextToken(Loc, SM, LangOpts); +  if (!Tok || Tok->isNot(TKind)) +    return {}; +  SourceLocation TokenLoc = Tok->getLocation(); + +  // Calculate how much whitespace needs to be skipped if any. +  unsigned NumWhitespaceChars = 0; +  if (SkipTrailingWhitespaceAndNewLine) { +    const char *TokenEnd = SM.getCharacterData(TokenLoc) + Tok->getLength(); +    unsigned char C = *TokenEnd; +    while (isHorizontalWhitespace(C)) { +      C = *(++TokenEnd); +      NumWhitespaceChars++; +    } + +    // Skip \r, \n, \r\n, or \n\r +    if (C == '\n' || C == '\r') { +      char PrevC = C; +      C = *(++TokenEnd); +      NumWhitespaceChars++; +      if ((C == '\n' || C == '\r') && C != PrevC) +        NumWhitespaceChars++; +    } +  } + +  return TokenLoc.getLocWithOffset(Tok->getLength() + NumWhitespaceChars); +} + +/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, +/// get its size, and return it.  This is tricky in several cases: +///   1. If currently at the start of a trigraph, we warn about the trigraph, +///      then either return the trigraph (skipping 3 chars) or the '?', +///      depending on whether trigraphs are enabled or not. +///   2. If this is an escaped newline (potentially with whitespace between +///      the backslash and newline), implicitly skip the newline and return +///      the char after it. +/// +/// This handles the slow/uncommon case of the getCharAndSize method.  Here we +/// know that we can accumulate into Size, and that we have already incremented +/// Ptr by Size bytes. +/// +/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should +/// be updated to match. +char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, +                               Token *Tok) { +  // If we have a slash, look for an escaped newline. +  if (Ptr[0] == '\\') { +    ++Size; +    ++Ptr; +Slash: +    // Common case, backslash-char where the char is not whitespace. +    if (!isWhitespace(Ptr[0])) return '\\'; + +    // See if we have optional whitespace characters between the slash and +    // newline. +    if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { +      // Remember that this token needs to be cleaned. +      if (Tok) Tok->setFlag(Token::NeedsCleaning); + +      // Warn if there was whitespace between the backslash and newline. +      if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode()) +        Diag(Ptr, diag::backslash_newline_space); + +      // Found backslash<whitespace><newline>.  Parse the char after it. +      Size += EscapedNewLineSize; +      Ptr  += EscapedNewLineSize; + +      // Use slow version to accumulate a correct size field. +      return getCharAndSizeSlow(Ptr, Size, Tok); +    } + +    // Otherwise, this is not an escaped newline, just return the slash. +    return '\\'; +  } + +  // If this is a trigraph, process it. +  if (Ptr[0] == '?' && Ptr[1] == '?') { +    // If this is actually a legal trigraph (not something like "??x"), emit +    // a trigraph warning.  If so, and if trigraphs are enabled, return it. +    if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : nullptr)) { +      // Remember that this token needs to be cleaned. +      if (Tok) Tok->setFlag(Token::NeedsCleaning); + +      Ptr += 3; +      Size += 3; +      if (C == '\\') goto Slash; +      return C; +    } +  } + +  // If this is neither, return a single character. +  ++Size; +  return *Ptr; +} + +/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the +/// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size, +/// and that we have already incremented Ptr by Size bytes. +/// +/// NOTE: When this method is updated, getCharAndSizeSlow (above) should +/// be updated to match. +char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, +                                     const LangOptions &LangOpts) { +  // If we have a slash, look for an escaped newline. +  if (Ptr[0] == '\\') { +    ++Size; +    ++Ptr; +Slash: +    // Common case, backslash-char where the char is not whitespace. +    if (!isWhitespace(Ptr[0])) return '\\'; + +    // See if we have optional whitespace characters followed by a newline. +    if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { +      // Found backslash<whitespace><newline>.  Parse the char after it. +      Size += EscapedNewLineSize; +      Ptr  += EscapedNewLineSize; + +      // Use slow version to accumulate a correct size field. +      return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); +    } + +    // Otherwise, this is not an escaped newline, just return the slash. +    return '\\'; +  } + +  // If this is a trigraph, process it. +  if (LangOpts.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { +    // If this is actually a legal trigraph (not something like "??x"), return +    // it. +    if (char C = GetTrigraphCharForLetter(Ptr[2])) { +      Ptr += 3; +      Size += 3; +      if (C == '\\') goto Slash; +      return C; +    } +  } + +  // If this is neither, return a single character. +  ++Size; +  return *Ptr; +} + +//===----------------------------------------------------------------------===// +// Helper methods for lexing. +//===----------------------------------------------------------------------===// + +/// Routine that indiscriminately sets the offset into the source file. +void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { +  BufferPtr = BufferStart + Offset; +  if (BufferPtr > BufferEnd) +    BufferPtr = BufferEnd; +  // FIXME: What exactly does the StartOfLine bit mean?  There are two +  // possible meanings for the "start" of the line: the first token on the +  // unexpanded line, or the first token on the expanded line. +  IsAtStartOfLine = StartOfLine; +  IsAtPhysicalStartOfLine = StartOfLine; +} + +static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { +  if (LangOpts.AsmPreprocessor) { +    return false; +  } else if (LangOpts.CPlusPlus11 || LangOpts.C11) { +    static const llvm::sys::UnicodeCharSet C11AllowedIDChars( +        C11AllowedIDCharRanges); +    return C11AllowedIDChars.contains(C); +  } else if (LangOpts.CPlusPlus) { +    static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( +        CXX03AllowedIDCharRanges); +    return CXX03AllowedIDChars.contains(C); +  } else { +    static const llvm::sys::UnicodeCharSet C99AllowedIDChars( +        C99AllowedIDCharRanges); +    return C99AllowedIDChars.contains(C); +  } +} + +static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { +  assert(isAllowedIDChar(C, LangOpts)); +  if (LangOpts.AsmPreprocessor) { +    return false; +  } else if (LangOpts.CPlusPlus11 || LangOpts.C11) { +    static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars( +        C11DisallowedInitialIDCharRanges); +    return !C11DisallowedInitialIDChars.contains(C); +  } else if (LangOpts.CPlusPlus) { +    return true; +  } else { +    static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( +        C99DisallowedInitialIDCharRanges); +    return !C99DisallowedInitialIDChars.contains(C); +  } +} + +static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin, +                                            const char *End) { +  return CharSourceRange::getCharRange(L.getSourceLocation(Begin), +                                       L.getSourceLocation(End)); +} + +static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, +                                      CharSourceRange Range, bool IsFirst) { +  // Check C99 compatibility. +  if (!Diags.isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) { +    enum { +      CannotAppearInIdentifier = 0, +      CannotStartIdentifier +    }; + +    static const llvm::sys::UnicodeCharSet C99AllowedIDChars( +        C99AllowedIDCharRanges); +    static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( +        C99DisallowedInitialIDCharRanges); +    if (!C99AllowedIDChars.contains(C)) { +      Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) +        << Range +        << CannotAppearInIdentifier; +    } else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) { +      Diags.Report(Range.getBegin(), diag::warn_c99_compat_unicode_id) +        << Range +        << CannotStartIdentifier; +    } +  } + +  // Check C++98 compatibility. +  if (!Diags.isIgnored(diag::warn_cxx98_compat_unicode_id, Range.getBegin())) { +    static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( +        CXX03AllowedIDCharRanges); +    if (!CXX03AllowedIDChars.contains(C)) { +      Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id) +        << Range; +    } +  } +} + +/// After encountering UTF-8 character C and interpreting it as an identifier +/// character, check whether it's a homoglyph for a common non-identifier +/// source character that is unlikely to be an intentional identifier +/// character and warn if so. +static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, +                                       CharSourceRange Range) { +  // FIXME: Handle Unicode quotation marks (smart quotes, fullwidth quotes). +  struct HomoglyphPair { +    uint32_t Character; +    char LooksLike; +    bool operator<(HomoglyphPair R) const { return Character < R.Character; } +  }; +  static constexpr HomoglyphPair SortedHomoglyphs[] = { +    {U'\u00ad', 0},   // SOFT HYPHEN +    {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK +    {U'\u037e', ';'}, // GREEK QUESTION MARK +    {U'\u200b', 0},   // ZERO WIDTH SPACE +    {U'\u200c', 0},   // ZERO WIDTH NON-JOINER +    {U'\u200d', 0},   // ZERO WIDTH JOINER +    {U'\u2060', 0},   // WORD JOINER +    {U'\u2061', 0},   // FUNCTION APPLICATION +    {U'\u2062', 0},   // INVISIBLE TIMES +    {U'\u2063', 0},   // INVISIBLE SEPARATOR +    {U'\u2064', 0},   // INVISIBLE PLUS +    {U'\u2212', '-'}, // MINUS SIGN +    {U'\u2215', '/'}, // DIVISION SLASH +    {U'\u2216', '\\'}, // SET MINUS +    {U'\u2217', '*'}, // ASTERISK OPERATOR +    {U'\u2223', '|'}, // DIVIDES +    {U'\u2227', '^'}, // LOGICAL AND +    {U'\u2236', ':'}, // RATIO +    {U'\u223c', '~'}, // TILDE OPERATOR +    {U'\ua789', ':'}, // MODIFIER LETTER COLON +    {U'\ufeff', 0},   // ZERO WIDTH NO-BREAK SPACE +    {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK +    {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN +    {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN +    {U'\uff05', '%'}, // FULLWIDTH PERCENT SIGN +    {U'\uff06', '&'}, // FULLWIDTH AMPERSAND +    {U'\uff08', '('}, // FULLWIDTH LEFT PARENTHESIS +    {U'\uff09', ')'}, // FULLWIDTH RIGHT PARENTHESIS +    {U'\uff0a', '*'}, // FULLWIDTH ASTERISK +    {U'\uff0b', '+'}, // FULLWIDTH ASTERISK +    {U'\uff0c', ','}, // FULLWIDTH COMMA +    {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS +    {U'\uff0e', '.'}, // FULLWIDTH FULL STOP +    {U'\uff0f', '/'}, // FULLWIDTH SOLIDUS +    {U'\uff1a', ':'}, // FULLWIDTH COLON +    {U'\uff1b', ';'}, // FULLWIDTH SEMICOLON +    {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN +    {U'\uff1d', '='}, // FULLWIDTH EQUALS SIGN +    {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN +    {U'\uff1f', '?'}, // FULLWIDTH QUESTION MARK +    {U'\uff20', '@'}, // FULLWIDTH COMMERCIAL AT +    {U'\uff3b', '['}, // FULLWIDTH LEFT SQUARE BRACKET +    {U'\uff3c', '\\'}, // FULLWIDTH REVERSE SOLIDUS +    {U'\uff3d', ']'}, // FULLWIDTH RIGHT SQUARE BRACKET +    {U'\uff3e', '^'}, // FULLWIDTH CIRCUMFLEX ACCENT +    {U'\uff5b', '{'}, // FULLWIDTH LEFT CURLY BRACKET +    {U'\uff5c', '|'}, // FULLWIDTH VERTICAL LINE +    {U'\uff5d', '}'}, // FULLWIDTH RIGHT CURLY BRACKET +    {U'\uff5e', '~'}, // FULLWIDTH TILDE +    {0, 0} +  }; +  auto Homoglyph = +      std::lower_bound(std::begin(SortedHomoglyphs), +                       std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'}); +  if (Homoglyph->Character == C) { +    llvm::SmallString<5> CharBuf; +    { +      llvm::raw_svector_ostream CharOS(CharBuf); +      llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4); +    } +    if (Homoglyph->LooksLike) { +      const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; +      Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) +          << Range << CharBuf << LooksLikeStr; +    } else { +      Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width) +          << Range << CharBuf; +    } +  } +} + +bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, +                                    Token &Result) { +  const char *UCNPtr = CurPtr + Size; +  uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/nullptr); +  if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts)) +    return false; + +  if (!isLexingRawMode()) +    maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, +                              makeCharRange(*this, CurPtr, UCNPtr), +                              /*IsFirst=*/false); + +  Result.setFlag(Token::HasUCN); +  if ((UCNPtr - CurPtr ==  6 && CurPtr[1] == 'u') || +      (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) +    CurPtr = UCNPtr; +  else +    while (CurPtr != UCNPtr) +      (void)getAndAdvanceChar(CurPtr, Result); +  return true; +} + +bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { +  const char *UnicodePtr = CurPtr; +  llvm::UTF32 CodePoint; +  llvm::ConversionResult Result = +      llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr, +                                (const llvm::UTF8 *)BufferEnd, +                                &CodePoint, +                                llvm::strictConversion); +  if (Result != llvm::conversionOK || +      !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) +    return false; + +  if (!isLexingRawMode()) { +    maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, +                              makeCharRange(*this, CurPtr, UnicodePtr), +                              /*IsFirst=*/false); +    maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint, +                               makeCharRange(*this, CurPtr, UnicodePtr)); +  } + +  CurPtr = UnicodePtr; +  return true; +} + +bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) { +  // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] +  unsigned Size; +  unsigned char C = *CurPtr++; +  while (isIdentifierBody(C)) +    C = *CurPtr++; + +  --CurPtr;   // Back up over the skipped character. + +  // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline +  // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN. +  // +  // TODO: Could merge these checks into an InfoTable flag to make the +  // comparison cheaper +  if (isASCII(C) && C != '\\' && C != '?' && +      (C != '$' || !LangOpts.DollarIdents)) { +FinishIdentifier: +    const char *IdStart = BufferPtr; +    FormTokenWithChars(Result, CurPtr, tok::raw_identifier); +    Result.setRawIdentifierData(IdStart); + +    // If we are in raw mode, return this identifier raw.  There is no need to +    // look up identifier information or attempt to macro expand it. +    if (LexingRawMode) +      return true; + +    // Fill in Result.IdentifierInfo and update the token kind, +    // looking up the identifier in the identifier table. +    IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); +    // Note that we have to call PP->LookUpIdentifierInfo() even for code +    // completion, it writes IdentifierInfo into Result, and callers rely on it. + +    // If the completion point is at the end of an identifier, we want to treat +    // the identifier as incomplete even if it resolves to a macro or a keyword. +    // This allows e.g. 'class^' to complete to 'classifier'. +    if (isCodeCompletionPoint(CurPtr)) { +      // Return the code-completion token. +      Result.setKind(tok::code_completion); +      // Skip the code-completion char and all immediate identifier characters. +      // This ensures we get consistent behavior when completing at any point in +      // an identifier (i.e. at the start, in the middle, at the end). Note that +      // only simple cases (i.e. [a-zA-Z0-9_]) are supported to keep the code +      // simpler. +      assert(*CurPtr == 0 && "Completion character must be 0"); +      ++CurPtr; +      // Note that code completion token is not added as a separate character +      // when the completion point is at the end of the buffer. Therefore, we need +      // to check if the buffer has ended. +      if (CurPtr < BufferEnd) { +        while (isIdentifierBody(*CurPtr)) +          ++CurPtr; +      } +      BufferPtr = CurPtr; +      return true; +    } + +    // Finally, now that we know we have an identifier, pass this off to the +    // preprocessor, which may macro expand it or something. +    if (II->isHandleIdentifierCase()) +      return PP->HandleIdentifier(Result); + +    return true; +  } + +  // Otherwise, $,\,? in identifier found.  Enter slower path. + +  C = getCharAndSize(CurPtr, Size); +  while (true) { +    if (C == '$') { +      // If we hit a $ and they are not supported in identifiers, we are done. +      if (!LangOpts.DollarIdents) goto FinishIdentifier; + +      // Otherwise, emit a diagnostic and continue. +      if (!isLexingRawMode()) +        Diag(CurPtr, diag::ext_dollar_in_identifier); +      CurPtr = ConsumeChar(CurPtr, Size, Result); +      C = getCharAndSize(CurPtr, Size); +      continue; +    } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) { +      C = getCharAndSize(CurPtr, Size); +      continue; +    } else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) { +      C = getCharAndSize(CurPtr, Size); +      continue; +    } else if (!isIdentifierBody(C)) { +      goto FinishIdentifier; +    } + +    // Otherwise, this character is good, consume it. +    CurPtr = ConsumeChar(CurPtr, Size, Result); + +    C = getCharAndSize(CurPtr, Size); +    while (isIdentifierBody(C)) { +      CurPtr = ConsumeChar(CurPtr, Size, Result); +      C = getCharAndSize(CurPtr, Size); +    } +  } +} + +/// isHexaLiteral - Return true if Start points to a hex constant. +/// in microsoft mode (where this is supposed to be several different tokens). +bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) { +  unsigned Size; +  char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts); +  if (C1 != '0') +    return false; +  char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts); +  return (C2 == 'x' || C2 == 'X'); +} + +/// LexNumericConstant - Lex the remainder of a integer or floating point +/// constant. From[-1] is the first character lexed.  Return the end of the +/// constant. +bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { +  unsigned Size; +  char C = getCharAndSize(CurPtr, Size); +  char PrevCh = 0; +  while (isPreprocessingNumberBody(C)) { +    CurPtr = ConsumeChar(CurPtr, Size, Result); +    PrevCh = C; +    C = getCharAndSize(CurPtr, Size); +  } + +  // If we fell out, check for a sign, due to 1e+12.  If we have one, continue. +  if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) { +    // If we are in Microsoft mode, don't continue if the constant is hex. +    // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1 +    if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts)) +      return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); +  } + +  // If we have a hex FP constant, continue. +  if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) { +    // Outside C99 and C++17, we accept hexadecimal floating point numbers as a +    // not-quite-conforming extension. Only do so if this looks like it's +    // actually meant to be a hexfloat, and not if it has a ud-suffix. +    bool IsHexFloat = true; +    if (!LangOpts.C99) { +      if (!isHexaLiteral(BufferPtr, LangOpts)) +        IsHexFloat = false; +      else if (!getLangOpts().CPlusPlus17 && +               std::find(BufferPtr, CurPtr, '_') != CurPtr) +        IsHexFloat = false; +    } +    if (IsHexFloat) +      return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); +  } + +  // If we have a digit separator, continue. +  if (C == '\'' && getLangOpts().CPlusPlus14) { +    unsigned NextSize; +    char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts()); +    if (isIdentifierBody(Next)) { +      if (!isLexingRawMode()) +        Diag(CurPtr, diag::warn_cxx11_compat_digit_separator); +      CurPtr = ConsumeChar(CurPtr, Size, Result); +      CurPtr = ConsumeChar(CurPtr, NextSize, Result); +      return LexNumericConstant(Result, CurPtr); +    } +  } + +  // If we have a UCN or UTF-8 character (perhaps in a ud-suffix), continue. +  if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) +    return LexNumericConstant(Result, CurPtr); +  if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) +    return LexNumericConstant(Result, CurPtr); + +  // Update the location of token as well as BufferPtr. +  const char *TokStart = BufferPtr; +  FormTokenWithChars(Result, CurPtr, tok::numeric_constant); +  Result.setLiteralData(TokStart); +  return true; +} + +/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes +/// in C++11, or warn on a ud-suffix in C++98. +const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, +                               bool IsStringLiteral) { +  assert(getLangOpts().CPlusPlus); + +  // Maximally munch an identifier. +  unsigned Size; +  char C = getCharAndSize(CurPtr, Size); +  bool Consumed = false; + +  if (!isIdentifierHead(C)) { +    if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) +      Consumed = true; +    else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) +      Consumed = true; +    else +      return CurPtr; +  } + +  if (!getLangOpts().CPlusPlus11) { +    if (!isLexingRawMode()) +      Diag(CurPtr, +           C == '_' ? diag::warn_cxx11_compat_user_defined_literal +                    : diag::warn_cxx11_compat_reserved_user_defined_literal) +        << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); +    return CurPtr; +  } + +  // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix +  // that does not start with an underscore is ill-formed. As a conforming +  // extension, we treat all such suffixes as if they had whitespace before +  // them. We assume a suffix beginning with a UCN or UTF-8 character is more +  // likely to be a ud-suffix than a macro, however, and accept that. +  if (!Consumed) { +    bool IsUDSuffix = false; +    if (C == '_') +      IsUDSuffix = true; +    else if (IsStringLiteral && getLangOpts().CPlusPlus14) { +      // In C++1y, we need to look ahead a few characters to see if this is a +      // valid suffix for a string literal or a numeric literal (this could be +      // the 'operator""if' defining a numeric literal operator). +      const unsigned MaxStandardSuffixLength = 3; +      char Buffer[MaxStandardSuffixLength] = { C }; +      unsigned Consumed = Size; +      unsigned Chars = 1; +      while (true) { +        unsigned NextSize; +        char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, +                                         getLangOpts()); +        if (!isIdentifierBody(Next)) { +          // End of suffix. Check whether this is on the whitelist. +          const StringRef CompleteSuffix(Buffer, Chars); +          IsUDSuffix = StringLiteralParser::isValidUDSuffix(getLangOpts(), +                                                            CompleteSuffix); +          break; +        } + +        if (Chars == MaxStandardSuffixLength) +          // Too long: can't be a standard suffix. +          break; + +        Buffer[Chars++] = Next; +        Consumed += NextSize; +      } +    } + +    if (!IsUDSuffix) { +      if (!isLexingRawMode()) +        Diag(CurPtr, getLangOpts().MSVCCompat +                         ? diag::ext_ms_reserved_user_defined_literal +                         : diag::ext_reserved_user_defined_literal) +          << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); +      return CurPtr; +    } + +    CurPtr = ConsumeChar(CurPtr, Size, Result); +  } + +  Result.setFlag(Token::HasUDSuffix); +  while (true) { +    C = getCharAndSize(CurPtr, Size); +    if (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); } +    else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {} +    else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {} +    else break; +  } + +  return CurPtr; +} + +/// LexStringLiteral - Lex the remainder of a string literal, after having lexed +/// either " or L" or u8" or u" or U". +bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr, +                             tok::TokenKind Kind) { +  const char *AfterQuote = CurPtr; +  // Does this string contain the \0 character? +  const char *NulCharacter = nullptr; + +  if (!isLexingRawMode() && +      (Kind == tok::utf8_string_literal || +       Kind == tok::utf16_string_literal || +       Kind == tok::utf32_string_literal)) +    Diag(BufferPtr, getLangOpts().CPlusPlus +           ? diag::warn_cxx98_compat_unicode_literal +           : diag::warn_c99_compat_unicode_literal); + +  char C = getAndAdvanceChar(CurPtr, Result); +  while (C != '"') { +    // Skip escaped characters.  Escaped newlines will already be processed by +    // getAndAdvanceChar. +    if (C == '\\') +      C = getAndAdvanceChar(CurPtr, Result); + +    if (C == '\n' || C == '\r' ||             // Newline. +        (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file. +      if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) +        Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1; +      FormTokenWithChars(Result, CurPtr-1, tok::unknown); +      return true; +    } + +    if (C == 0) { +      if (isCodeCompletionPoint(CurPtr-1)) { +        if (ParsingFilename) +          codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false); +        else +          PP->CodeCompleteNaturalLanguage(); +        FormTokenWithChars(Result, CurPtr - 1, tok::unknown); +        cutOffLexing(); +        return true; +      } + +      NulCharacter = CurPtr-1; +    } +    C = getAndAdvanceChar(CurPtr, Result); +  } + +  // If we are in C++11, lex the optional ud-suffix. +  if (getLangOpts().CPlusPlus) +    CurPtr = LexUDSuffix(Result, CurPtr, true); + +  // If a nul character existed in the string, warn about it. +  if (NulCharacter && !isLexingRawMode()) +    Diag(NulCharacter, diag::null_in_char_or_string) << 1; + +  // Update the location of the token as well as the BufferPtr instance var. +  const char *TokStart = BufferPtr; +  FormTokenWithChars(Result, CurPtr, Kind); +  Result.setLiteralData(TokStart); +  return true; +} + +/// LexRawStringLiteral - Lex the remainder of a raw string literal, after +/// having lexed R", LR", u8R", uR", or UR". +bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, +                                tok::TokenKind Kind) { +  // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3: +  //  Between the initial and final double quote characters of the raw string, +  //  any transformations performed in phases 1 and 2 (trigraphs, +  //  universal-character-names, and line splicing) are reverted. + +  if (!isLexingRawMode()) +    Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal); + +  unsigned PrefixLen = 0; + +  while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) +    ++PrefixLen; + +  // If the last character was not a '(', then we didn't lex a valid delimiter. +  if (CurPtr[PrefixLen] != '(') { +    if (!isLexingRawMode()) { +      const char *PrefixEnd = &CurPtr[PrefixLen]; +      if (PrefixLen == 16) { +        Diag(PrefixEnd, diag::err_raw_delim_too_long); +      } else { +        Diag(PrefixEnd, diag::err_invalid_char_raw_delim) +          << StringRef(PrefixEnd, 1); +      } +    } + +    // Search for the next '"' in hopes of salvaging the lexer. Unfortunately, +    // it's possible the '"' was intended to be part of the raw string, but +    // there's not much we can do about that. +    while (true) { +      char C = *CurPtr++; + +      if (C == '"') +        break; +      if (C == 0 && CurPtr-1 == BufferEnd) { +        --CurPtr; +        break; +      } +    } + +    FormTokenWithChars(Result, CurPtr, tok::unknown); +    return true; +  } + +  // Save prefix and move CurPtr past it +  const char *Prefix = CurPtr; +  CurPtr += PrefixLen + 1; // skip over prefix and '(' + +  while (true) { +    char C = *CurPtr++; + +    if (C == ')') { +      // Check for prefix match and closing quote. +      if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') { +        CurPtr += PrefixLen + 1; // skip over prefix and '"' +        break; +      } +    } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file. +      if (!isLexingRawMode()) +        Diag(BufferPtr, diag::err_unterminated_raw_string) +          << StringRef(Prefix, PrefixLen); +      FormTokenWithChars(Result, CurPtr-1, tok::unknown); +      return true; +    } +  } + +  // If we are in C++11, lex the optional ud-suffix. +  if (getLangOpts().CPlusPlus) +    CurPtr = LexUDSuffix(Result, CurPtr, true); + +  // Update the location of token as well as BufferPtr. +  const char *TokStart = BufferPtr; +  FormTokenWithChars(Result, CurPtr, Kind); +  Result.setLiteralData(TokStart); +  return true; +} + +/// LexAngledStringLiteral - Lex the remainder of an angled string literal, +/// after having lexed the '<' character.  This is used for #include filenames. +bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { +  // Does this string contain the \0 character? +  const char *NulCharacter = nullptr; +  const char *AfterLessPos = CurPtr; +  char C = getAndAdvanceChar(CurPtr, Result); +  while (C != '>') { +    // Skip escaped characters.  Escaped newlines will already be processed by +    // getAndAdvanceChar. +    if (C == '\\') +      C = getAndAdvanceChar(CurPtr, Result); + +    if (C == '\n' || C == '\r' ||                // Newline. +        (C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file. +      // If the filename is unterminated, then it must just be a lone < +      // character.  Return this as such. +      FormTokenWithChars(Result, AfterLessPos, tok::less); +      return true; +    } + +    if (C == 0) { +      if (isCodeCompletionPoint(CurPtr - 1)) { +        codeCompleteIncludedFile(AfterLessPos, CurPtr - 1, /*IsAngled=*/true); +        cutOffLexing(); +        FormTokenWithChars(Result, CurPtr - 1, tok::unknown); +        return true; +      } +      NulCharacter = CurPtr-1; +    } +    C = getAndAdvanceChar(CurPtr, Result); +  } + +  // If a nul character existed in the string, warn about it. +  if (NulCharacter && !isLexingRawMode()) +    Diag(NulCharacter, diag::null_in_char_or_string) << 1; + +  // Update the location of token as well as BufferPtr. +  const char *TokStart = BufferPtr; +  FormTokenWithChars(Result, CurPtr, tok::header_name); +  Result.setLiteralData(TokStart); +  return true; +} + +void Lexer::codeCompleteIncludedFile(const char *PathStart, +                                     const char *CompletionPoint, +                                     bool IsAngled) { +  // Completion only applies to the filename, after the last slash. +  StringRef PartialPath(PathStart, CompletionPoint - PathStart); +  auto Slash = PartialPath.find_last_of(LangOpts.MSVCCompat ? "/\\" : "/"); +  StringRef Dir = +      (Slash == StringRef::npos) ? "" : PartialPath.take_front(Slash); +  const char *StartOfFilename = +      (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1; +  // Code completion filter range is the filename only, up to completion point. +  PP->setCodeCompletionIdentifierInfo(&PP->getIdentifierTable().get( +      StringRef(StartOfFilename, CompletionPoint - StartOfFilename))); +  // We should replace the characters up to the closing quote, if any. +  while (CompletionPoint < BufferEnd) { +    char Next = *(CompletionPoint + 1); +    if (Next == 0 || Next == '\r' || Next == '\n') +      break; +    ++CompletionPoint; +    if (Next == (IsAngled ? '>' : '"')) +      break; +  } +  PP->setCodeCompletionTokenRange( +      FileLoc.getLocWithOffset(StartOfFilename - BufferStart), +      FileLoc.getLocWithOffset(CompletionPoint - BufferStart)); +  PP->CodeCompleteIncludedFile(Dir, IsAngled); +} + +/// LexCharConstant - Lex the remainder of a character constant, after having +/// lexed either ' or L' or u8' or u' or U'. +bool Lexer::LexCharConstant(Token &Result, const char *CurPtr, +                            tok::TokenKind Kind) { +  // Does this character contain the \0 character? +  const char *NulCharacter = nullptr; + +  if (!isLexingRawMode()) { +    if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant) +      Diag(BufferPtr, getLangOpts().CPlusPlus +                          ? diag::warn_cxx98_compat_unicode_literal +                          : diag::warn_c99_compat_unicode_literal); +    else if (Kind == tok::utf8_char_constant) +      Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal); +  } + +  char C = getAndAdvanceChar(CurPtr, Result); +  if (C == '\'') { +    if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) +      Diag(BufferPtr, diag::ext_empty_character); +    FormTokenWithChars(Result, CurPtr, tok::unknown); +    return true; +  } + +  while (C != '\'') { +    // Skip escaped characters. +    if (C == '\\') +      C = getAndAdvanceChar(CurPtr, Result); + +    if (C == '\n' || C == '\r' ||             // Newline. +        (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file. +      if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) +        Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0; +      FormTokenWithChars(Result, CurPtr-1, tok::unknown); +      return true; +    } + +    if (C == 0) { +      if (isCodeCompletionPoint(CurPtr-1)) { +        PP->CodeCompleteNaturalLanguage(); +        FormTokenWithChars(Result, CurPtr-1, tok::unknown); +        cutOffLexing(); +        return true; +      } + +      NulCharacter = CurPtr-1; +    } +    C = getAndAdvanceChar(CurPtr, Result); +  } + +  // If we are in C++11, lex the optional ud-suffix. +  if (getLangOpts().CPlusPlus) +    CurPtr = LexUDSuffix(Result, CurPtr, false); + +  // If a nul character existed in the character, warn about it. +  if (NulCharacter && !isLexingRawMode()) +    Diag(NulCharacter, diag::null_in_char_or_string) << 0; + +  // Update the location of token as well as BufferPtr. +  const char *TokStart = BufferPtr; +  FormTokenWithChars(Result, CurPtr, Kind); +  Result.setLiteralData(TokStart); +  return true; +} + +/// SkipWhitespace - Efficiently skip over a series of whitespace characters. +/// Update BufferPtr to point to the next non-whitespace character and return. +/// +/// This method forms a token and returns true if KeepWhitespaceMode is enabled. +bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr, +                           bool &TokAtPhysicalStartOfLine) { +  // Whitespace - Skip it, then return the token after the whitespace. +  bool SawNewline = isVerticalWhitespace(CurPtr[-1]); + +  unsigned char Char = *CurPtr; + +  // Skip consecutive spaces efficiently. +  while (true) { +    // Skip horizontal whitespace very aggressively. +    while (isHorizontalWhitespace(Char)) +      Char = *++CurPtr; + +    // Otherwise if we have something other than whitespace, we're done. +    if (!isVerticalWhitespace(Char)) +      break; + +    if (ParsingPreprocessorDirective) { +      // End of preprocessor directive line, let LexTokenInternal handle this. +      BufferPtr = CurPtr; +      return false; +    } + +    // OK, but handle newline. +    SawNewline = true; +    Char = *++CurPtr; +  } + +  // If the client wants us to return whitespace, return it now. +  if (isKeepWhitespaceMode()) { +    FormTokenWithChars(Result, CurPtr, tok::unknown); +    if (SawNewline) { +      IsAtStartOfLine = true; +      IsAtPhysicalStartOfLine = true; +    } +    // FIXME: The next token will not have LeadingSpace set. +    return true; +  } + +  // If this isn't immediately after a newline, there is leading space. +  char PrevChar = CurPtr[-1]; +  bool HasLeadingSpace = !isVerticalWhitespace(PrevChar); + +  Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace); +  if (SawNewline) { +    Result.setFlag(Token::StartOfLine); +    TokAtPhysicalStartOfLine = true; +  } + +  BufferPtr = CurPtr; +  return false; +} + +/// We have just read the // characters from input.  Skip until we find the +/// newline character that terminates the comment.  Then update BufferPtr and +/// return. +/// +/// If we're in KeepCommentMode or any CommentHandler has inserted +/// some tokens, this will store the first token and return true. +bool Lexer::SkipLineComment(Token &Result, const char *CurPtr, +                            bool &TokAtPhysicalStartOfLine) { +  // If Line comments aren't explicitly enabled for this language, emit an +  // extension warning. +  if (!LangOpts.LineComment && !isLexingRawMode()) { +    Diag(BufferPtr, diag::ext_line_comment); + +    // Mark them enabled so we only emit one warning for this translation +    // unit. +    LangOpts.LineComment = true; +  } + +  // Scan over the body of the comment.  The common case, when scanning, is that +  // the comment contains normal ascii characters with nothing interesting in +  // them.  As such, optimize for this case with the inner loop. +  // +  // This loop terminates with CurPtr pointing at the newline (or end of buffer) +  // character that ends the line comment. +  char C; +  while (true) { +    C = *CurPtr; +    // Skip over characters in the fast loop. +    while (C != 0 &&                // Potentially EOF. +           C != '\n' && C != '\r')  // Newline or DOS-style newline. +      C = *++CurPtr; + +    const char *NextLine = CurPtr; +    if (C != 0) { +      // We found a newline, see if it's escaped. +      const char *EscapePtr = CurPtr-1; +      bool HasSpace = false; +      while (isHorizontalWhitespace(*EscapePtr)) { // Skip whitespace. +        --EscapePtr; +        HasSpace = true; +      } + +      if (*EscapePtr == '\\') +        // Escaped newline. +        CurPtr = EscapePtr; +      else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' && +               EscapePtr[-2] == '?' && LangOpts.Trigraphs) +        // Trigraph-escaped newline. +        CurPtr = EscapePtr-2; +      else +        break; // This is a newline, we're done. + +      // If there was space between the backslash and newline, warn about it. +      if (HasSpace && !isLexingRawMode()) +        Diag(EscapePtr, diag::backslash_newline_space); +    } + +    // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to +    // properly decode the character.  Read it in raw mode to avoid emitting +    // diagnostics about things like trigraphs.  If we see an escaped newline, +    // we'll handle it below. +    const char *OldPtr = CurPtr; +    bool OldRawMode = isLexingRawMode(); +    LexingRawMode = true; +    C = getAndAdvanceChar(CurPtr, Result); +    LexingRawMode = OldRawMode; + +    // If we only read only one character, then no special handling is needed. +    // We're done and can skip forward to the newline. +    if (C != 0 && CurPtr == OldPtr+1) { +      CurPtr = NextLine; +      break; +    } + +    // If we read multiple characters, and one of those characters was a \r or +    // \n, then we had an escaped newline within the comment.  Emit diagnostic +    // unless the next line is also a // comment. +    if (CurPtr != OldPtr + 1 && C != '/' && +        (CurPtr == BufferEnd + 1 || CurPtr[0] != '/')) { +      for (; OldPtr != CurPtr; ++OldPtr) +        if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { +          // Okay, we found a // comment that ends in a newline, if the next +          // line is also a // comment, but has spaces, don't emit a diagnostic. +          if (isWhitespace(C)) { +            const char *ForwardPtr = CurPtr; +            while (isWhitespace(*ForwardPtr))  // Skip whitespace. +              ++ForwardPtr; +            if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') +              break; +          } + +          if (!isLexingRawMode()) +            Diag(OldPtr-1, diag::ext_multi_line_line_comment); +          break; +        } +    } + +    if (C == '\r' || C == '\n' || CurPtr == BufferEnd + 1) { +      --CurPtr; +      break; +    } + +    if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { +      PP->CodeCompleteNaturalLanguage(); +      cutOffLexing(); +      return false; +    } +  } + +  // Found but did not consume the newline.  Notify comment handlers about the +  // comment unless we're in a #if 0 block. +  if (PP && !isLexingRawMode() && +      PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), +                                            getSourceLocation(CurPtr)))) { +    BufferPtr = CurPtr; +    return true; // A token has to be returned. +  } + +  // If we are returning comments as tokens, return this comment as a token. +  if (inKeepCommentMode()) +    return SaveLineComment(Result, CurPtr); + +  // If we are inside a preprocessor directive and we see the end of line, +  // return immediately, so that the lexer can return this as an EOD token. +  if (ParsingPreprocessorDirective || CurPtr == BufferEnd) { +    BufferPtr = CurPtr; +    return false; +  } + +  // Otherwise, eat the \n character.  We don't care if this is a \n\r or +  // \r\n sequence.  This is an efficiency hack (because we know the \n can't +  // contribute to another token), it isn't needed for correctness.  Note that +  // this is ok even in KeepWhitespaceMode, because we would have returned the +  /// comment above in that mode. +  ++CurPtr; + +  // The next returned token is at the start of the line. +  Result.setFlag(Token::StartOfLine); +  TokAtPhysicalStartOfLine = true; +  // No leading whitespace seen so far. +  Result.clearFlag(Token::LeadingSpace); +  BufferPtr = CurPtr; +  return false; +} + +/// If in save-comment mode, package up this Line comment in an appropriate +/// way and return it. +bool Lexer::SaveLineComment(Token &Result, const char *CurPtr) { +  // If we're not in a preprocessor directive, just return the // comment +  // directly. +  FormTokenWithChars(Result, CurPtr, tok::comment); + +  if (!ParsingPreprocessorDirective || LexingRawMode) +    return true; + +  // If this Line-style comment is in a macro definition, transmogrify it into +  // a C-style block comment. +  bool Invalid = false; +  std::string Spelling = PP->getSpelling(Result, &Invalid); +  if (Invalid) +    return true; + +  assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?"); +  Spelling[1] = '*';   // Change prefix to "/*". +  Spelling += "*/";    // add suffix. + +  Result.setKind(tok::comment); +  PP->CreateString(Spelling, Result, +                   Result.getLocation(), Result.getLocation()); +  return true; +} + +/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline +/// character (either \\n or \\r) is part of an escaped newline sequence.  Issue +/// a diagnostic if so.  We know that the newline is inside of a block comment. +static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, +                                                  Lexer *L) { +  assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); + +  // Back up off the newline. +  --CurPtr; + +  // If this is a two-character newline sequence, skip the other character. +  if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { +    // \n\n or \r\r -> not escaped newline. +    if (CurPtr[0] == CurPtr[1]) +      return false; +    // \n\r or \r\n -> skip the newline. +    --CurPtr; +  } + +  // If we have horizontal whitespace, skip over it.  We allow whitespace +  // between the slash and newline. +  bool HasSpace = false; +  while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { +    --CurPtr; +    HasSpace = true; +  } + +  // If we have a slash, we know this is an escaped newline. +  if (*CurPtr == '\\') { +    if (CurPtr[-1] != '*') return false; +  } else { +    // It isn't a slash, is it the ?? / trigraph? +    if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || +        CurPtr[-3] != '*') +      return false; + +    // This is the trigraph ending the comment.  Emit a stern warning! +    CurPtr -= 2; + +    // If no trigraphs are enabled, warn that we ignored this trigraph and +    // ignore this * character. +    if (!L->getLangOpts().Trigraphs) { +      if (!L->isLexingRawMode()) +        L->Diag(CurPtr, diag::trigraph_ignored_block_comment); +      return false; +    } +    if (!L->isLexingRawMode()) +      L->Diag(CurPtr, diag::trigraph_ends_block_comment); +  } + +  // Warn about having an escaped newline between the */ characters. +  if (!L->isLexingRawMode()) +    L->Diag(CurPtr, diag::escaped_newline_block_comment_end); + +  // If there was space between the backslash and newline, warn about it. +  if (HasSpace && !L->isLexingRawMode()) +    L->Diag(CurPtr, diag::backslash_newline_space); + +  return true; +} + +#ifdef __SSE2__ +#include <emmintrin.h> +#elif __ALTIVEC__ +#include <altivec.h> +#undef bool +#endif + +/// We have just read from input the / and * characters that started a comment. +/// Read until we find the * and / characters that terminate the comment. +/// Note that we don't bother decoding trigraphs or escaped newlines in block +/// comments, because they cannot cause the comment to end.  The only thing +/// that can happen is the comment could end with an escaped newline between +/// the terminating * and /. +/// +/// If we're in KeepCommentMode or any CommentHandler has inserted +/// some tokens, this will store the first token and return true. +bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, +                             bool &TokAtPhysicalStartOfLine) { +  // Scan one character past where we should, looking for a '/' character.  Once +  // we find it, check to see if it was preceded by a *.  This common +  // optimization helps people who like to put a lot of * characters in their +  // comments. + +  // The first character we get with newlines and trigraphs skipped to handle +  // the degenerate /*/ case below correctly if the * has an escaped newline +  // after it. +  unsigned CharSize; +  unsigned char C = getCharAndSize(CurPtr, CharSize); +  CurPtr += CharSize; +  if (C == 0 && CurPtr == BufferEnd+1) { +    if (!isLexingRawMode()) +      Diag(BufferPtr, diag::err_unterminated_block_comment); +    --CurPtr; + +    // KeepWhitespaceMode should return this broken comment as a token.  Since +    // it isn't a well formed comment, just return it as an 'unknown' token. +    if (isKeepWhitespaceMode()) { +      FormTokenWithChars(Result, CurPtr, tok::unknown); +      return true; +    } + +    BufferPtr = CurPtr; +    return false; +  } + +  // Check to see if the first character after the '/*' is another /.  If so, +  // then this slash does not end the block comment, it is part of it. +  if (C == '/') +    C = *CurPtr++; + +  while (true) { +    // Skip over all non-interesting characters until we find end of buffer or a +    // (probably ending) '/' character. +    if (CurPtr + 24 < BufferEnd && +        // If there is a code-completion point avoid the fast scan because it +        // doesn't check for '\0'. +        !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) { +      // While not aligned to a 16-byte boundary. +      while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) +        C = *CurPtr++; + +      if (C == '/') goto FoundSlash; + +#ifdef __SSE2__ +      __m128i Slashes = _mm_set1_epi8('/'); +      while (CurPtr+16 <= BufferEnd) { +        int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr, +                                    Slashes)); +        if (cmp != 0) { +          // Adjust the pointer to point directly after the first slash. It's +          // not necessary to set C here, it will be overwritten at the end of +          // the outer loop. +          CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1; +          goto FoundSlash; +        } +        CurPtr += 16; +      } +#elif __ALTIVEC__ +      __vector unsigned char Slashes = { +        '/', '/', '/', '/',  '/', '/', '/', '/', +        '/', '/', '/', '/',  '/', '/', '/', '/' +      }; +      while (CurPtr+16 <= BufferEnd && +             !vec_any_eq(*(const vector unsigned char*)CurPtr, Slashes)) +        CurPtr += 16; +#else +      // Scan for '/' quickly.  Many block comments are very large. +      while (CurPtr[0] != '/' && +             CurPtr[1] != '/' && +             CurPtr[2] != '/' && +             CurPtr[3] != '/' && +             CurPtr+4 < BufferEnd) { +        CurPtr += 4; +      } +#endif + +      // It has to be one of the bytes scanned, increment to it and read one. +      C = *CurPtr++; +    } + +    // Loop to scan the remainder. +    while (C != '/' && C != '\0') +      C = *CurPtr++; + +    if (C == '/') { +  FoundSlash: +      if (CurPtr[-2] == '*')  // We found the final */.  We're done! +        break; + +      if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { +        if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) { +          // We found the final */, though it had an escaped newline between the +          // * and /.  We're done! +          break; +        } +      } +      if (CurPtr[0] == '*' && CurPtr[1] != '/') { +        // If this is a /* inside of the comment, emit a warning.  Don't do this +        // if this is a /*/, which will end the comment.  This misses cases with +        // embedded escaped newlines, but oh well. +        if (!isLexingRawMode()) +          Diag(CurPtr-1, diag::warn_nested_block_comment); +      } +    } else if (C == 0 && CurPtr == BufferEnd+1) { +      if (!isLexingRawMode()) +        Diag(BufferPtr, diag::err_unterminated_block_comment); +      // Note: the user probably forgot a */.  We could continue immediately +      // after the /*, but this would involve lexing a lot of what really is the +      // comment, which surely would confuse the parser. +      --CurPtr; + +      // KeepWhitespaceMode should return this broken comment as a token.  Since +      // it isn't a well formed comment, just return it as an 'unknown' token. +      if (isKeepWhitespaceMode()) { +        FormTokenWithChars(Result, CurPtr, tok::unknown); +        return true; +      } + +      BufferPtr = CurPtr; +      return false; +    } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { +      PP->CodeCompleteNaturalLanguage(); +      cutOffLexing(); +      return false; +    } + +    C = *CurPtr++; +  } + +  // Notify comment handlers about the comment unless we're in a #if 0 block. +  if (PP && !isLexingRawMode() && +      PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), +                                            getSourceLocation(CurPtr)))) { +    BufferPtr = CurPtr; +    return true; // A token has to be returned. +  } + +  // If we are returning comments as tokens, return this comment as a token. +  if (inKeepCommentMode()) { +    FormTokenWithChars(Result, CurPtr, tok::comment); +    return true; +  } + +  // It is common for the tokens immediately after a /**/ comment to be +  // whitespace.  Instead of going through the big switch, handle it +  // efficiently now.  This is safe even in KeepWhitespaceMode because we would +  // have already returned above with the comment as a token. +  if (isHorizontalWhitespace(*CurPtr)) { +    SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine); +    return false; +  } + +  // Otherwise, just return so that the next character will be lexed as a token. +  BufferPtr = CurPtr; +  Result.setFlag(Token::LeadingSpace); +  return false; +} + +//===----------------------------------------------------------------------===// +// Primary Lexing Entry Points +//===----------------------------------------------------------------------===// + +/// ReadToEndOfLine - Read the rest of the current preprocessor line as an +/// uninterpreted string.  This switches the lexer out of directive mode. +void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { +  assert(ParsingPreprocessorDirective && ParsingFilename == false && +         "Must be in a preprocessing directive!"); +  Token Tmp; + +  // CurPtr - Cache BufferPtr in an automatic variable. +  const char *CurPtr = BufferPtr; +  while (true) { +    char Char = getAndAdvanceChar(CurPtr, Tmp); +    switch (Char) { +    default: +      if (Result) +        Result->push_back(Char); +      break; +    case 0:  // Null. +      // Found end of file? +      if (CurPtr-1 != BufferEnd) { +        if (isCodeCompletionPoint(CurPtr-1)) { +          PP->CodeCompleteNaturalLanguage(); +          cutOffLexing(); +          return; +        } + +        // Nope, normal character, continue. +        if (Result) +          Result->push_back(Char); +        break; +      } +      // FALL THROUGH. +      LLVM_FALLTHROUGH; +    case '\r': +    case '\n': +      // Okay, we found the end of the line. First, back up past the \0, \r, \n. +      assert(CurPtr[-1] == Char && "Trigraphs for newline?"); +      BufferPtr = CurPtr-1; + +      // Next, lex the character, which should handle the EOD transition. +      Lex(Tmp); +      if (Tmp.is(tok::code_completion)) { +        if (PP) +          PP->CodeCompleteNaturalLanguage(); +        Lex(Tmp); +      } +      assert(Tmp.is(tok::eod) && "Unexpected token!"); + +      // Finally, we're done; +      return; +    } +  } +} + +/// LexEndOfFile - CurPtr points to the end of this file.  Handle this +/// condition, reporting diagnostics and handling other edge cases as required. +/// This returns true if Result contains a token, false if PP.Lex should be +/// called again. +bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { +  // If we hit the end of the file while parsing a preprocessor directive, +  // end the preprocessor directive first.  The next token returned will +  // then be the end of file. +  if (ParsingPreprocessorDirective) { +    // Done parsing the "line". +    ParsingPreprocessorDirective = false; +    // Update the location of token as well as BufferPtr. +    FormTokenWithChars(Result, CurPtr, tok::eod); + +    // Restore comment saving mode, in case it was disabled for directive. +    if (PP) +      resetExtendedTokenMode(); +    return true;  // Have a token. +  } + +  // If we are in raw mode, return this event as an EOF token.  Let the caller +  // that put us in raw mode handle the event. +  if (isLexingRawMode()) { +    Result.startToken(); +    BufferPtr = BufferEnd; +    FormTokenWithChars(Result, BufferEnd, tok::eof); +    return true; +  } + +  if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) { +    PP->setRecordedPreambleConditionalStack(ConditionalStack); +    ConditionalStack.clear(); +  } + +  // Issue diagnostics for unterminated #if and missing newline. + +  // If we are in a #if directive, emit an error. +  while (!ConditionalStack.empty()) { +    if (PP->getCodeCompletionFileLoc() != FileLoc) +      PP->Diag(ConditionalStack.back().IfLoc, +               diag::err_pp_unterminated_conditional); +    ConditionalStack.pop_back(); +  } + +  // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue +  // a pedwarn. +  if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) { +    DiagnosticsEngine &Diags = PP->getDiagnostics(); +    SourceLocation EndLoc = getSourceLocation(BufferEnd); +    unsigned DiagID; + +    if (LangOpts.CPlusPlus11) { +      // C++11 [lex.phases] 2.2 p2 +      // Prefer the C++98 pedantic compatibility warning over the generic, +      // non-extension, user-requested "missing newline at EOF" warning. +      if (!Diags.isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) { +        DiagID = diag::warn_cxx98_compat_no_newline_eof; +      } else { +        DiagID = diag::warn_no_newline_eof; +      } +    } else { +      DiagID = diag::ext_no_newline_eof; +    } + +    Diag(BufferEnd, DiagID) +      << FixItHint::CreateInsertion(EndLoc, "\n"); +  } + +  BufferPtr = CurPtr; + +  // Finally, let the preprocessor handle this. +  return PP->HandleEndOfFile(Result, isPragmaLexer()); +} + +/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from +/// the specified lexer will return a tok::l_paren token, 0 if it is something +/// else and 2 if there are no more tokens in the buffer controlled by the +/// lexer. +unsigned Lexer::isNextPPTokenLParen() { +  assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); + +  // Switch to 'skipping' mode.  This will ensure that we can lex a token +  // without emitting diagnostics, disables macro expansion, and will cause EOF +  // to return an EOF token instead of popping the include stack. +  LexingRawMode = true; + +  // Save state that can be changed while lexing so that we can restore it. +  const char *TmpBufferPtr = BufferPtr; +  bool inPPDirectiveMode = ParsingPreprocessorDirective; +  bool atStartOfLine = IsAtStartOfLine; +  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; +  bool leadingSpace = HasLeadingSpace; + +  Token Tok; +  Lex(Tok); + +  // Restore state that may have changed. +  BufferPtr = TmpBufferPtr; +  ParsingPreprocessorDirective = inPPDirectiveMode; +  HasLeadingSpace = leadingSpace; +  IsAtStartOfLine = atStartOfLine; +  IsAtPhysicalStartOfLine = atPhysicalStartOfLine; + +  // Restore the lexer back to non-skipping mode. +  LexingRawMode = false; + +  if (Tok.is(tok::eof)) +    return 2; +  return Tok.is(tok::l_paren); +} + +/// Find the end of a version control conflict marker. +static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, +                                   ConflictMarkerKind CMK) { +  const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; +  size_t TermLen = CMK == CMK_Perforce ? 5 : 7; +  auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen); +  size_t Pos = RestOfBuffer.find(Terminator); +  while (Pos != StringRef::npos) { +    // Must occur at start of line. +    if (Pos == 0 || +        (RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) { +      RestOfBuffer = RestOfBuffer.substr(Pos+TermLen); +      Pos = RestOfBuffer.find(Terminator); +      continue; +    } +    return RestOfBuffer.data()+Pos; +  } +  return nullptr; +} + +/// IsStartOfConflictMarker - If the specified pointer is the start of a version +/// control conflict marker like '<<<<<<<', recognize it as such, emit an error +/// and recover nicely.  This returns true if it is a conflict marker and false +/// if not. +bool Lexer::IsStartOfConflictMarker(const char *CurPtr) { +  // Only a conflict marker if it starts at the beginning of a line. +  if (CurPtr != BufferStart && +      CurPtr[-1] != '\n' && CurPtr[-1] != '\r') +    return false; + +  // Check to see if we have <<<<<<< or >>>>. +  if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") && +      !StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> ")) +    return false; + +  // If we have a situation where we don't care about conflict markers, ignore +  // it. +  if (CurrentConflictMarkerState || isLexingRawMode()) +    return false; + +  ConflictMarkerKind Kind = *CurPtr == '<' ? CMK_Normal : CMK_Perforce; + +  // Check to see if there is an ending marker somewhere in the buffer at the +  // start of a line to terminate this conflict marker. +  if (FindConflictEnd(CurPtr, BufferEnd, Kind)) { +    // We found a match.  We are really in a conflict marker. +    // Diagnose this, and ignore to the end of line. +    Diag(CurPtr, diag::err_conflict_marker); +    CurrentConflictMarkerState = Kind; + +    // Skip ahead to the end of line.  We know this exists because the +    // end-of-conflict marker starts with \r or \n. +    while (*CurPtr != '\r' && *CurPtr != '\n') { +      assert(CurPtr != BufferEnd && "Didn't find end of line"); +      ++CurPtr; +    } +    BufferPtr = CurPtr; +    return true; +  } + +  // No end of conflict marker found. +  return false; +} + +/// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if +/// it is '<<<<' and the conflict marker started with a '>>>>' marker, then it +/// is the end of a conflict marker.  Handle it by ignoring up until the end of +/// the line.  This returns true if it is a conflict marker and false if not. +bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) { +  // Only a conflict marker if it starts at the beginning of a line. +  if (CurPtr != BufferStart && +      CurPtr[-1] != '\n' && CurPtr[-1] != '\r') +    return false; + +  // If we have a situation where we don't care about conflict markers, ignore +  // it. +  if (!CurrentConflictMarkerState || isLexingRawMode()) +    return false; + +  // Check to see if we have the marker (4 characters in a row). +  for (unsigned i = 1; i != 4; ++i) +    if (CurPtr[i] != CurPtr[0]) +      return false; + +  // If we do have it, search for the end of the conflict marker.  This could +  // fail if it got skipped with a '#if 0' or something.  Note that CurPtr might +  // be the end of conflict marker. +  if (const char *End = FindConflictEnd(CurPtr, BufferEnd, +                                        CurrentConflictMarkerState)) { +    CurPtr = End; + +    // Skip ahead to the end of line. +    while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n') +      ++CurPtr; + +    BufferPtr = CurPtr; + +    // No longer in the conflict marker. +    CurrentConflictMarkerState = CMK_None; +    return true; +  } + +  return false; +} + +static const char *findPlaceholderEnd(const char *CurPtr, +                                      const char *BufferEnd) { +  if (CurPtr == BufferEnd) +    return nullptr; +  BufferEnd -= 1; // Scan until the second last character. +  for (; CurPtr != BufferEnd; ++CurPtr) { +    if (CurPtr[0] == '#' && CurPtr[1] == '>') +      return CurPtr + 2; +  } +  return nullptr; +} + +bool Lexer::lexEditorPlaceholder(Token &Result, const char *CurPtr) { +  assert(CurPtr[-1] == '<' && CurPtr[0] == '#' && "Not a placeholder!"); +  if (!PP || !PP->getPreprocessorOpts().LexEditorPlaceholders || LexingRawMode) +    return false; +  const char *End = findPlaceholderEnd(CurPtr + 1, BufferEnd); +  if (!End) +    return false; +  const char *Start = CurPtr - 1; +  if (!LangOpts.AllowEditorPlaceholders) +    Diag(Start, diag::err_placeholder_in_source); +  Result.startToken(); +  FormTokenWithChars(Result, End, tok::raw_identifier); +  Result.setRawIdentifierData(Start); +  PP->LookUpIdentifierInfo(Result); +  Result.setFlag(Token::IsEditorPlaceholder); +  BufferPtr = End; +  return true; +} + +bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { +  if (PP && PP->isCodeCompletionEnabled()) { +    SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart); +    return Loc == PP->getCodeCompletionLoc(); +  } + +  return false; +} + +uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, +                           Token *Result) { +  unsigned CharSize; +  char Kind = getCharAndSize(StartPtr, CharSize); + +  unsigned NumHexDigits; +  if (Kind == 'u') +    NumHexDigits = 4; +  else if (Kind == 'U') +    NumHexDigits = 8; +  else +    return 0; + +  if (!LangOpts.CPlusPlus && !LangOpts.C99) { +    if (Result && !isLexingRawMode()) +      Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89); +    return 0; +  } + +  const char *CurPtr = StartPtr + CharSize; +  const char *KindLoc = &CurPtr[-1]; + +  uint32_t CodePoint = 0; +  for (unsigned i = 0; i < NumHexDigits; ++i) { +    char C = getCharAndSize(CurPtr, CharSize); + +    unsigned Value = llvm::hexDigitValue(C); +    if (Value == -1U) { +      if (Result && !isLexingRawMode()) { +        if (i == 0) { +          Diag(BufferPtr, diag::warn_ucn_escape_no_digits) +            << StringRef(KindLoc, 1); +        } else { +          Diag(BufferPtr, diag::warn_ucn_escape_incomplete); + +          // If the user wrote \U1234, suggest a fixit to \u. +          if (i == 4 && NumHexDigits == 8) { +            CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1); +            Diag(KindLoc, diag::note_ucn_four_not_eight) +              << FixItHint::CreateReplacement(URange, "u"); +          } +        } +      } + +      return 0; +    } + +    CodePoint <<= 4; +    CodePoint += Value; + +    CurPtr += CharSize; +  } + +  if (Result) { +    Result->setFlag(Token::HasUCN); +    if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2) +      StartPtr = CurPtr; +    else +      while (StartPtr != CurPtr) +        (void)getAndAdvanceChar(StartPtr, *Result); +  } else { +    StartPtr = CurPtr; +  } + +  // Don't apply C family restrictions to UCNs in assembly mode +  if (LangOpts.AsmPreprocessor) +    return CodePoint; + +  // C99 6.4.3p2: A universal character name shall not specify a character whose +  //   short identifier is less than 00A0 other than 0024 ($), 0040 (@), or +  //   0060 (`), nor one in the range D800 through DFFF inclusive.) +  // C++11 [lex.charset]p2: If the hexadecimal value for a +  //   universal-character-name corresponds to a surrogate code point (in the +  //   range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally, +  //   if the hexadecimal value for a universal-character-name outside the +  //   c-char-sequence, s-char-sequence, or r-char-sequence of a character or +  //   string literal corresponds to a control character (in either of the +  //   ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the +  //   basic source character set, the program is ill-formed. +  if (CodePoint < 0xA0) { +    if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60) +      return CodePoint; + +    // We don't use isLexingRawMode() here because we need to warn about bad +    // UCNs even when skipping preprocessing tokens in a #if block. +    if (Result && PP) { +      if (CodePoint < 0x20 || CodePoint >= 0x7F) +        Diag(BufferPtr, diag::err_ucn_control_character); +      else { +        char C = static_cast<char>(CodePoint); +        Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1); +      } +    } + +    return 0; +  } else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) { +    // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't. +    // We don't use isLexingRawMode() here because we need to diagnose bad +    // UCNs even when skipping preprocessing tokens in a #if block. +    if (Result && PP) { +      if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11) +        Diag(BufferPtr, diag::warn_ucn_escape_surrogate); +      else +        Diag(BufferPtr, diag::err_ucn_escape_invalid); +    } +    return 0; +  } + +  return CodePoint; +} + +bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, +                                   const char *CurPtr) { +  static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars( +      UnicodeWhitespaceCharRanges); +  if (!isLexingRawMode() && !PP->isPreprocessedOutput() && +      UnicodeWhitespaceChars.contains(C)) { +    Diag(BufferPtr, diag::ext_unicode_whitespace) +      << makeCharRange(*this, BufferPtr, CurPtr); + +    Result.setFlag(Token::LeadingSpace); +    return true; +  } +  return false; +} + +bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { +  if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) { +    if (!isLexingRawMode() && !ParsingPreprocessorDirective && +        !PP->isPreprocessedOutput()) { +      maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, +                                makeCharRange(*this, BufferPtr, CurPtr), +                                /*IsFirst=*/true); +      maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C, +                                 makeCharRange(*this, BufferPtr, CurPtr)); +    } + +    MIOpt.ReadToken(); +    return LexIdentifier(Result, CurPtr); +  } + +  if (!isLexingRawMode() && !ParsingPreprocessorDirective && +      !PP->isPreprocessedOutput() && +      !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) { +    // Non-ASCII characters tend to creep into source code unintentionally. +    // Instead of letting the parser complain about the unknown token, +    // just drop the character. +    // Note that we can /only/ do this when the non-ASCII character is actually +    // spelled as Unicode, not written as a UCN. The standard requires that +    // we not throw away any possible preprocessor tokens, but there's a +    // loophole in the mapping of Unicode characters to basic character set +    // characters that allows us to map these particular characters to, say, +    // whitespace. +    Diag(BufferPtr, diag::err_non_ascii) +      << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr)); + +    BufferPtr = CurPtr; +    return false; +  } + +  // Otherwise, we have an explicit UCN or a character that's unlikely to show +  // up by accident. +  MIOpt.ReadToken(); +  FormTokenWithChars(Result, CurPtr, tok::unknown); +  return true; +} + +void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { +  IsAtStartOfLine = Result.isAtStartOfLine(); +  HasLeadingSpace = Result.hasLeadingSpace(); +  HasLeadingEmptyMacro = Result.hasLeadingEmptyMacro(); +  // Note that this doesn't affect IsAtPhysicalStartOfLine. +} + +bool Lexer::Lex(Token &Result) { +  // Start a new token. +  Result.startToken(); + +  // Set up misc whitespace flags for LexTokenInternal. +  if (IsAtStartOfLine) { +    Result.setFlag(Token::StartOfLine); +    IsAtStartOfLine = false; +  } + +  if (HasLeadingSpace) { +    Result.setFlag(Token::LeadingSpace); +    HasLeadingSpace = false; +  } + +  if (HasLeadingEmptyMacro) { +    Result.setFlag(Token::LeadingEmptyMacro); +    HasLeadingEmptyMacro = false; +  } + +  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; +  IsAtPhysicalStartOfLine = false; +  bool isRawLex = isLexingRawMode(); +  (void) isRawLex; +  bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); +  // (After the LexTokenInternal call, the lexer might be destroyed.) +  assert((returnedToken || !isRawLex) && "Raw lex must succeed"); +  return returnedToken; +} + +/// LexTokenInternal - This implements a simple C family lexer.  It is an +/// extremely performance critical piece of code.  This assumes that the buffer +/// has a null character at the end of the file.  This returns a preprocessing +/// token, not a normal token, as such, it is an internal interface.  It assumes +/// that the Flags of result have been cleared before calling this. +bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { +LexNextToken: +  // New token, can't need cleaning yet. +  Result.clearFlag(Token::NeedsCleaning); +  Result.setIdentifierInfo(nullptr); + +  // CurPtr - Cache BufferPtr in an automatic variable. +  const char *CurPtr = BufferPtr; + +  // Small amounts of horizontal whitespace is very common between tokens. +  if ((*CurPtr == ' ') || (*CurPtr == '\t')) { +    ++CurPtr; +    while ((*CurPtr == ' ') || (*CurPtr == '\t')) +      ++CurPtr; + +    // If we are keeping whitespace and other tokens, just return what we just +    // skipped.  The next lexer invocation will return the token after the +    // whitespace. +    if (isKeepWhitespaceMode()) { +      FormTokenWithChars(Result, CurPtr, tok::unknown); +      // FIXME: The next token will not have LeadingSpace set. +      return true; +    } + +    BufferPtr = CurPtr; +    Result.setFlag(Token::LeadingSpace); +  } + +  unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below. + +  // Read a character, advancing over it. +  char Char = getAndAdvanceChar(CurPtr, Result); +  tok::TokenKind Kind; + +  switch (Char) { +  case 0:  // Null. +    // Found end of file? +    if (CurPtr-1 == BufferEnd) +      return LexEndOfFile(Result, CurPtr-1); + +    // Check if we are performing code completion. +    if (isCodeCompletionPoint(CurPtr-1)) { +      // Return the code-completion token. +      Result.startToken(); +      FormTokenWithChars(Result, CurPtr, tok::code_completion); +      return true; +    } + +    if (!isLexingRawMode()) +      Diag(CurPtr-1, diag::null_in_file); +    Result.setFlag(Token::LeadingSpace); +    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +      return true; // KeepWhitespaceMode + +    // We know the lexer hasn't changed, so just try again with this lexer. +    // (We manually eliminate the tail call to avoid recursion.) +    goto LexNextToken; + +  case 26:  // DOS & CP/M EOF: "^Z". +    // If we're in Microsoft extensions mode, treat this as end of file. +    if (LangOpts.MicrosoftExt) { +      if (!isLexingRawMode()) +        Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft); +      return LexEndOfFile(Result, CurPtr-1); +    } + +    // If Microsoft extensions are disabled, this is just random garbage. +    Kind = tok::unknown; +    break; + +  case '\r': +    if (CurPtr[0] == '\n') +      (void)getAndAdvanceChar(CurPtr, Result); +    LLVM_FALLTHROUGH; +  case '\n': +    // If we are inside a preprocessor directive and we see the end of line, +    // we know we are done with the directive, so return an EOD token. +    if (ParsingPreprocessorDirective) { +      // Done parsing the "line". +      ParsingPreprocessorDirective = false; + +      // Restore comment saving mode, in case it was disabled for directive. +      if (PP) +        resetExtendedTokenMode(); + +      // Since we consumed a newline, we are back at the start of a line. +      IsAtStartOfLine = true; +      IsAtPhysicalStartOfLine = true; + +      Kind = tok::eod; +      break; +    } + +    // No leading whitespace seen so far. +    Result.clearFlag(Token::LeadingSpace); + +    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +      return true; // KeepWhitespaceMode + +    // We only saw whitespace, so just try again with this lexer. +    // (We manually eliminate the tail call to avoid recursion.) +    goto LexNextToken; +  case ' ': +  case '\t': +  case '\f': +  case '\v': +  SkipHorizontalWhitespace: +    Result.setFlag(Token::LeadingSpace); +    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +      return true; // KeepWhitespaceMode + +  SkipIgnoredUnits: +    CurPtr = BufferPtr; + +    // If the next token is obviously a // or /* */ comment, skip it efficiently +    // too (without going through the big switch stmt). +    if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() && +        LangOpts.LineComment && +        (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) { +      if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) +        return true; // There is a token to return. +      goto SkipIgnoredUnits; +    } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) { +      if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine)) +        return true; // There is a token to return. +      goto SkipIgnoredUnits; +    } else if (isHorizontalWhitespace(*CurPtr)) { +      goto SkipHorizontalWhitespace; +    } +    // We only saw whitespace, so just try again with this lexer. +    // (We manually eliminate the tail call to avoid recursion.) +    goto LexNextToken; + +  // C99 6.4.4.1: Integer Constants. +  // C99 6.4.4.2: Floating Constants. +  case '0': case '1': case '2': case '3': case '4': +  case '5': case '6': case '7': case '8': case '9': +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); +    return LexNumericConstant(Result, CurPtr); + +  case 'u':   // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); + +    if (LangOpts.CPlusPlus11 || LangOpts.C11) { +      Char = getCharAndSize(CurPtr, SizeTmp); + +      // UTF-16 string literal +      if (Char == '"') +        return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                                tok::utf16_string_literal); + +      // UTF-16 character constant +      if (Char == '\'') +        return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                               tok::utf16_char_constant); + +      // UTF-16 raw string literal +      if (Char == 'R' && LangOpts.CPlusPlus11 && +          getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') +        return LexRawStringLiteral(Result, +                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                                           SizeTmp2, Result), +                               tok::utf16_string_literal); + +      if (Char == '8') { +        char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2); + +        // UTF-8 string literal +        if (Char2 == '"') +          return LexStringLiteral(Result, +                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                                           SizeTmp2, Result), +                               tok::utf8_string_literal); +        if (Char2 == '\'' && LangOpts.CPlusPlus17) +          return LexCharConstant( +              Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                                  SizeTmp2, Result), +              tok::utf8_char_constant); + +        if (Char2 == 'R' && LangOpts.CPlusPlus11) { +          unsigned SizeTmp3; +          char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); +          // UTF-8 raw string literal +          if (Char3 == '"') { +            return LexRawStringLiteral(Result, +                   ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                                           SizeTmp2, Result), +                               SizeTmp3, Result), +                   tok::utf8_string_literal); +          } +        } +      } +    } + +    // treat u like the start of an identifier. +    return LexIdentifier(Result, CurPtr); + +  case 'U':   // Identifier (Uber) or C11/C++11 UTF-32 string literal +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); + +    if (LangOpts.CPlusPlus11 || LangOpts.C11) { +      Char = getCharAndSize(CurPtr, SizeTmp); + +      // UTF-32 string literal +      if (Char == '"') +        return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                                tok::utf32_string_literal); + +      // UTF-32 character constant +      if (Char == '\'') +        return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                               tok::utf32_char_constant); + +      // UTF-32 raw string literal +      if (Char == 'R' && LangOpts.CPlusPlus11 && +          getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') +        return LexRawStringLiteral(Result, +                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                                           SizeTmp2, Result), +                               tok::utf32_string_literal); +    } + +    // treat U like the start of an identifier. +    return LexIdentifier(Result, CurPtr); + +  case 'R': // Identifier or C++0x raw string literal +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); + +    if (LangOpts.CPlusPlus11) { +      Char = getCharAndSize(CurPtr, SizeTmp); + +      if (Char == '"') +        return LexRawStringLiteral(Result, +                                   ConsumeChar(CurPtr, SizeTmp, Result), +                                   tok::string_literal); +    } + +    // treat R like the start of an identifier. +    return LexIdentifier(Result, CurPtr); + +  case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz"). +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); +    Char = getCharAndSize(CurPtr, SizeTmp); + +    // Wide string literal. +    if (Char == '"') +      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                              tok::wide_string_literal); + +    // Wide raw string literal. +    if (LangOpts.CPlusPlus11 && Char == 'R' && +        getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') +      return LexRawStringLiteral(Result, +                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                                           SizeTmp2, Result), +                               tok::wide_string_literal); + +    // Wide character constant. +    if (Char == '\'') +      return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                             tok::wide_char_constant); +    // FALL THROUGH, treating L like the start of an identifier. +    LLVM_FALLTHROUGH; + +  // C99 6.4.2: Identifiers. +  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': +  case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N': +  case 'O': case 'P': case 'Q':    /*'R'*/case 'S': case 'T':    /*'U'*/ +  case 'V': case 'W': case 'X': case 'Y': case 'Z': +  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': +  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': +  case 'o': case 'p': case 'q': case 'r': case 's': case 't':    /*'u'*/ +  case 'v': case 'w': case 'x': case 'y': case 'z': +  case '_': +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); +    return LexIdentifier(Result, CurPtr); + +  case '$':   // $ in identifiers. +    if (LangOpts.DollarIdents) { +      if (!isLexingRawMode()) +        Diag(CurPtr-1, diag::ext_dollar_in_identifier); +      // Notify MIOpt that we read a non-whitespace/non-comment token. +      MIOpt.ReadToken(); +      return LexIdentifier(Result, CurPtr); +    } + +    Kind = tok::unknown; +    break; + +  // C99 6.4.4: Character Constants. +  case '\'': +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); +    return LexCharConstant(Result, CurPtr, tok::char_constant); + +  // C99 6.4.5: String Literals. +  case '"': +    // Notify MIOpt that we read a non-whitespace/non-comment token. +    MIOpt.ReadToken(); +    return LexStringLiteral(Result, CurPtr, +                            ParsingFilename ? tok::header_name +                                            : tok::string_literal); + +  // C99 6.4.6: Punctuators. +  case '?': +    Kind = tok::question; +    break; +  case '[': +    Kind = tok::l_square; +    break; +  case ']': +    Kind = tok::r_square; +    break; +  case '(': +    Kind = tok::l_paren; +    break; +  case ')': +    Kind = tok::r_paren; +    break; +  case '{': +    Kind = tok::l_brace; +    break; +  case '}': +    Kind = tok::r_brace; +    break; +  case '.': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char >= '0' && Char <= '9') { +      // Notify MIOpt that we read a non-whitespace/non-comment token. +      MIOpt.ReadToken(); + +      return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); +    } else if (LangOpts.CPlusPlus && Char == '*') { +      Kind = tok::periodstar; +      CurPtr += SizeTmp; +    } else if (Char == '.' && +               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') { +      Kind = tok::ellipsis; +      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                           SizeTmp2, Result); +    } else { +      Kind = tok::period; +    } +    break; +  case '&': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '&') { +      Kind = tok::ampamp; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else if (Char == '=') { +      Kind = tok::ampequal; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else { +      Kind = tok::amp; +    } +    break; +  case '*': +    if (getCharAndSize(CurPtr, SizeTmp) == '=') { +      Kind = tok::starequal; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else { +      Kind = tok::star; +    } +    break; +  case '+': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '+') { +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::plusplus; +    } else if (Char == '=') { +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::plusequal; +    } else { +      Kind = tok::plus; +    } +    break; +  case '-': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '-') {      // -- +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::minusminus; +    } else if (Char == '>' && LangOpts.CPlusPlus && +               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {  // C++ ->* +      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                           SizeTmp2, Result); +      Kind = tok::arrowstar; +    } else if (Char == '>') {   // -> +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::arrow; +    } else if (Char == '=') {   // -= +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::minusequal; +    } else { +      Kind = tok::minus; +    } +    break; +  case '~': +    Kind = tok::tilde; +    break; +  case '!': +    if (getCharAndSize(CurPtr, SizeTmp) == '=') { +      Kind = tok::exclaimequal; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else { +      Kind = tok::exclaim; +    } +    break; +  case '/': +    // 6.4.9: Comments +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '/') {         // Line comment. +      // Even if Line comments are disabled (e.g. in C89 mode), we generally +      // want to lex this as a comment.  There is one problem with this though, +      // that in one particular corner case, this can change the behavior of the +      // resultant program.  For example, In  "foo //**/ bar", C89 would lex +      // this as "foo / bar" and languages with Line comments would lex it as +      // "foo".  Check to see if the character after the second slash is a '*'. +      // If so, we will lex that as a "/" instead of the start of a comment. +      // However, we never do this if we are just preprocessing. +      bool TreatAsComment = LangOpts.LineComment && +                            (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP); +      if (!TreatAsComment) +        if (!(PP && PP->isPreprocessedOutput())) +          TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*'; + +      if (TreatAsComment) { +        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                            TokAtPhysicalStartOfLine)) +          return true; // There is a token to return. + +        // It is common for the tokens immediately after a // comment to be +        // whitespace (indentation for the next line).  Instead of going through +        // the big switch, handle it efficiently now. +        goto SkipIgnoredUnits; +      } +    } + +    if (Char == '*') {  // /**/ comment. +      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result), +                           TokAtPhysicalStartOfLine)) +        return true; // There is a token to return. + +      // We only saw whitespace, so just try again with this lexer. +      // (We manually eliminate the tail call to avoid recursion.) +      goto LexNextToken; +    } + +    if (Char == '=') { +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::slashequal; +    } else { +      Kind = tok::slash; +    } +    break; +  case '%': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '=') { +      Kind = tok::percentequal; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else if (LangOpts.Digraphs && Char == '>') { +      Kind = tok::r_brace;                             // '%>' -> '}' +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else if (LangOpts.Digraphs && Char == ':') { +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Char = getCharAndSize(CurPtr, SizeTmp); +      if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { +        Kind = tok::hashhash;                          // '%:%:' -> '##' +        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                             SizeTmp2, Result); +      } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize +        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +        if (!isLexingRawMode()) +          Diag(BufferPtr, diag::ext_charize_microsoft); +        Kind = tok::hashat; +      } else {                                         // '%:' -> '#' +        // We parsed a # character.  If this occurs at the start of the line, +        // it's actually the start of a preprocessing directive.  Callback to +        // the preprocessor to handle it. +        // TODO: -fpreprocessed mode?? +        if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) +          goto HandleDirective; + +        Kind = tok::hash; +      } +    } else { +      Kind = tok::percent; +    } +    break; +  case '<': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (ParsingFilename) { +      return LexAngledStringLiteral(Result, CurPtr); +    } else if (Char == '<') { +      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); +      if (After == '=') { +        Kind = tok::lesslessequal; +        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                             SizeTmp2, Result); +      } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) { +        // If this is actually a '<<<<<<<' version control conflict marker, +        // recognize it as such and recover nicely. +        goto LexNextToken; +      } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) { +        // If this is '<<<<' and we're in a Perforce-style conflict marker, +        // ignore it. +        goto LexNextToken; +      } else if (LangOpts.CUDA && After == '<') { +        Kind = tok::lesslessless; +        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                             SizeTmp2, Result); +      } else { +        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +        Kind = tok::lessless; +      } +    } else if (Char == '=') { +      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); +      if (After == '>') { +        if (getLangOpts().CPlusPlus2a) { +          if (!isLexingRawMode()) +            Diag(BufferPtr, diag::warn_cxx17_compat_spaceship); +          CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                               SizeTmp2, Result); +          Kind = tok::spaceship; +          break; +        } +        // Suggest adding a space between the '<=' and the '>' to avoid a +        // change in semantics if this turns up in C++ <=17 mode. +        if (getLangOpts().CPlusPlus && !isLexingRawMode()) { +          Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship) +            << FixItHint::CreateInsertion( +                   getSourceLocation(CurPtr + SizeTmp, SizeTmp2), " "); +        } +      } +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::lessequal; +    } else if (LangOpts.Digraphs && Char == ':') {     // '<:' -> '[' +      if (LangOpts.CPlusPlus11 && +          getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') { +        // C++0x [lex.pptoken]p3: +        //  Otherwise, if the next three characters are <:: and the subsequent +        //  character is neither : nor >, the < is treated as a preprocessor +        //  token by itself and not as the first character of the alternative +        //  token <:. +        unsigned SizeTmp3; +        char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); +        if (After != ':' && After != '>') { +          Kind = tok::less; +          if (!isLexingRawMode()) +            Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon); +          break; +        } +      } + +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::l_square; +    } else if (LangOpts.Digraphs && Char == '%') {     // '<%' -> '{' +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::l_brace; +    } else if (Char == '#' && /*Not a trigraph*/ SizeTmp == 1 && +               lexEditorPlaceholder(Result, CurPtr)) { +      return true; +    } else { +      Kind = tok::less; +    } +    break; +  case '>': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '=') { +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::greaterequal; +    } else if (Char == '>') { +      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2); +      if (After == '=') { +        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                             SizeTmp2, Result); +        Kind = tok::greatergreaterequal; +      } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) { +        // If this is actually a '>>>>' conflict marker, recognize it as such +        // and recover nicely. +        goto LexNextToken; +      } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { +        // If this is '>>>>>>>' and we're in a conflict marker, ignore it. +        goto LexNextToken; +      } else if (LangOpts.CUDA && After == '>') { +        Kind = tok::greatergreatergreater; +        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), +                             SizeTmp2, Result); +      } else { +        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +        Kind = tok::greatergreater; +      } +    } else { +      Kind = tok::greater; +    } +    break; +  case '^': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '=') { +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::caretequal; +    } else if (LangOpts.OpenCL && Char == '^') { +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +      Kind = tok::caretcaret; +    } else { +      Kind = tok::caret; +    } +    break; +  case '|': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '=') { +      Kind = tok::pipeequal; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else if (Char == '|') { +      // If this is '|||||||' and we're in a conflict marker, ignore it. +      if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1)) +        goto LexNextToken; +      Kind = tok::pipepipe; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else { +      Kind = tok::pipe; +    } +    break; +  case ':': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (LangOpts.Digraphs && Char == '>') { +      Kind = tok::r_square; // ':>' -> ']' +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else if ((LangOpts.CPlusPlus || +                LangOpts.DoubleSquareBracketAttributes) && +               Char == ':') { +      Kind = tok::coloncolon; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else { +      Kind = tok::colon; +    } +    break; +  case ';': +    Kind = tok::semi; +    break; +  case '=': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '=') { +      // If this is '====' and we're in a conflict marker, ignore it. +      if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) +        goto LexNextToken; + +      Kind = tok::equalequal; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else { +      Kind = tok::equal; +    } +    break; +  case ',': +    Kind = tok::comma; +    break; +  case '#': +    Char = getCharAndSize(CurPtr, SizeTmp); +    if (Char == '#') { +      Kind = tok::hashhash; +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else if (Char == '@' && LangOpts.MicrosoftExt) {  // #@ -> Charize +      Kind = tok::hashat; +      if (!isLexingRawMode()) +        Diag(BufferPtr, diag::ext_charize_microsoft); +      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); +    } else { +      // We parsed a # character.  If this occurs at the start of the line, +      // it's actually the start of a preprocessing directive.  Callback to +      // the preprocessor to handle it. +      // TODO: -fpreprocessed mode?? +      if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) +        goto HandleDirective; + +      Kind = tok::hash; +    } +    break; + +  case '@': +    // Objective C support. +    if (CurPtr[-1] == '@' && LangOpts.ObjC) +      Kind = tok::at; +    else +      Kind = tok::unknown; +    break; + +  // UCNs (C99 6.4.3, C++11 [lex.charset]p2) +  case '\\': +    if (!LangOpts.AsmPreprocessor) { +      if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) { +        if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { +          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +            return true; // KeepWhitespaceMode + +          // We only saw whitespace, so just try again with this lexer. +          // (We manually eliminate the tail call to avoid recursion.) +          goto LexNextToken; +        } + +        return LexUnicode(Result, CodePoint, CurPtr); +      } +    } + +    Kind = tok::unknown; +    break; + +  default: { +    if (isASCII(Char)) { +      Kind = tok::unknown; +      break; +    } + +    llvm::UTF32 CodePoint; + +    // We can't just reset CurPtr to BufferPtr because BufferPtr may point to +    // an escaped newline. +    --CurPtr; +    llvm::ConversionResult Status = +        llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, +                                  (const llvm::UTF8 *)BufferEnd, +                                  &CodePoint, +                                  llvm::strictConversion); +    if (Status == llvm::conversionOK) { +      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { +        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) +          return true; // KeepWhitespaceMode + +        // We only saw whitespace, so just try again with this lexer. +        // (We manually eliminate the tail call to avoid recursion.) +        goto LexNextToken; +      } +      return LexUnicode(Result, CodePoint, CurPtr); +    } + +    if (isLexingRawMode() || ParsingPreprocessorDirective || +        PP->isPreprocessedOutput()) { +      ++CurPtr; +      Kind = tok::unknown; +      break; +    } + +    // Non-ASCII characters tend to creep into source code unintentionally. +    // Instead of letting the parser complain about the unknown token, +    // just diagnose the invalid UTF-8, then drop the character. +    Diag(CurPtr, diag::err_invalid_utf8); + +    BufferPtr = CurPtr+1; +    // We're pretending the character didn't exist, so just try again with +    // this lexer. +    // (We manually eliminate the tail call to avoid recursion.) +    goto LexNextToken; +  } +  } + +  // Notify MIOpt that we read a non-whitespace/non-comment token. +  MIOpt.ReadToken(); + +  // Update the location of token as well as BufferPtr. +  FormTokenWithChars(Result, CurPtr, Kind); +  return true; + +HandleDirective: +  // We parsed a # character and it's the start of a preprocessing directive. + +  FormTokenWithChars(Result, CurPtr, tok::hash); +  PP->HandleDirective(Result); + +  if (PP->hadModuleLoaderFatalFailure()) { +    // With a fatal failure in the module loader, we abort parsing. +    assert(Result.is(tok::eof) && "Preprocessor did not set tok:eof"); +    return true; +  } + +  // We parsed the directive; lex a token with the new state. +  return false; +} diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp new file mode 100644 index 000000000000..2108408377fb --- /dev/null +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -0,0 +1,1896 @@ +//===--- LiteralSupport.cpp - Code to parse and process literals ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the NumericLiteralParser, CharLiteralParser, and +// StringLiteralParser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <string> + +using namespace clang; + +static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) { +  switch (kind) { +  default: llvm_unreachable("Unknown token type!"); +  case tok::char_constant: +  case tok::string_literal: +  case tok::utf8_char_constant: +  case tok::utf8_string_literal: +    return Target.getCharWidth(); +  case tok::wide_char_constant: +  case tok::wide_string_literal: +    return Target.getWCharWidth(); +  case tok::utf16_char_constant: +  case tok::utf16_string_literal: +    return Target.getChar16Width(); +  case tok::utf32_char_constant: +  case tok::utf32_string_literal: +    return Target.getChar32Width(); +  } +} + +static CharSourceRange MakeCharSourceRange(const LangOptions &Features, +                                           FullSourceLoc TokLoc, +                                           const char *TokBegin, +                                           const char *TokRangeBegin, +                                           const char *TokRangeEnd) { +  SourceLocation Begin = +    Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin, +                                   TokLoc.getManager(), Features); +  SourceLocation End = +    Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin, +                                   TokLoc.getManager(), Features); +  return CharSourceRange::getCharRange(Begin, End); +} + +/// Produce a diagnostic highlighting some portion of a literal. +/// +/// Emits the diagnostic \p DiagID, highlighting the range of characters from +/// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be +/// a substring of a spelling buffer for the token beginning at \p TokBegin. +static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, +                              const LangOptions &Features, FullSourceLoc TokLoc, +                              const char *TokBegin, const char *TokRangeBegin, +                              const char *TokRangeEnd, unsigned DiagID) { +  SourceLocation Begin = +    Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin, +                                   TokLoc.getManager(), Features); +  return Diags->Report(Begin, DiagID) << +    MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd); +} + +/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in +/// either a character or a string literal. +static unsigned ProcessCharEscape(const char *ThisTokBegin, +                                  const char *&ThisTokBuf, +                                  const char *ThisTokEnd, bool &HadError, +                                  FullSourceLoc Loc, unsigned CharWidth, +                                  DiagnosticsEngine *Diags, +                                  const LangOptions &Features) { +  const char *EscapeBegin = ThisTokBuf; + +  // Skip the '\' char. +  ++ThisTokBuf; + +  // We know that this character can't be off the end of the buffer, because +  // that would have been \", which would not have been the end of string. +  unsigned ResultChar = *ThisTokBuf++; +  switch (ResultChar) { +  // These map to themselves. +  case '\\': case '\'': case '"': case '?': break; + +    // These have fixed mappings. +  case 'a': +    // TODO: K&R: the meaning of '\\a' is different in traditional C +    ResultChar = 7; +    break; +  case 'b': +    ResultChar = 8; +    break; +  case 'e': +    if (Diags) +      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +           diag::ext_nonstandard_escape) << "e"; +    ResultChar = 27; +    break; +  case 'E': +    if (Diags) +      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +           diag::ext_nonstandard_escape) << "E"; +    ResultChar = 27; +    break; +  case 'f': +    ResultChar = 12; +    break; +  case 'n': +    ResultChar = 10; +    break; +  case 'r': +    ResultChar = 13; +    break; +  case 't': +    ResultChar = 9; +    break; +  case 'v': +    ResultChar = 11; +    break; +  case 'x': { // Hex escape. +    ResultChar = 0; +    if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { +      if (Diags) +        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +             diag::err_hex_escape_no_digits) << "x"; +      HadError = true; +      break; +    } + +    // Hex escapes are a maximal series of hex digits. +    bool Overflow = false; +    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { +      int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); +      if (CharVal == -1) break; +      // About to shift out a digit? +      if (ResultChar & 0xF0000000) +        Overflow = true; +      ResultChar <<= 4; +      ResultChar |= CharVal; +    } + +    // See if any bits will be truncated when evaluated as a character. +    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { +      Overflow = true; +      ResultChar &= ~0U >> (32-CharWidth); +    } + +    // Check for overflow. +    if (Overflow && Diags)   // Too many digits to fit in +      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +           diag::err_escape_too_large) << 0; +    break; +  } +  case '0': case '1': case '2': case '3': +  case '4': case '5': case '6': case '7': { +    // Octal escapes. +    --ThisTokBuf; +    ResultChar = 0; + +    // Octal escapes are a series of octal digits with maximum length 3. +    // "\0123" is a two digit sequence equal to "\012" "3". +    unsigned NumDigits = 0; +    do { +      ResultChar <<= 3; +      ResultChar |= *ThisTokBuf++ - '0'; +      ++NumDigits; +    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 && +             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7'); + +    // Check for overflow.  Reject '\777', but not L'\777'. +    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { +      if (Diags) +        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +             diag::err_escape_too_large) << 1; +      ResultChar &= ~0U >> (32-CharWidth); +    } +    break; +  } + +    // Otherwise, these are not valid escapes. +  case '(': case '{': case '[': case '%': +    // GCC accepts these as extensions.  We warn about them as such though. +    if (Diags) +      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +           diag::ext_nonstandard_escape) +        << std::string(1, ResultChar); +    break; +  default: +    if (!Diags) +      break; + +    if (isPrintable(ResultChar)) +      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +           diag::ext_unknown_escape) +        << std::string(1, ResultChar); +    else +      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, +           diag::ext_unknown_escape) +        << "x" + llvm::utohexstr(ResultChar); +    break; +  } + +  return ResultChar; +} + +static void appendCodePoint(unsigned Codepoint, +                            llvm::SmallVectorImpl<char> &Str) { +  char ResultBuf[4]; +  char *ResultPtr = ResultBuf; +  bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr); +  (void)Res; +  assert(Res && "Unexpected conversion failure"); +  Str.append(ResultBuf, ResultPtr); +} + +void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) { +  for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) { +    if (*I != '\\') { +      Buf.push_back(*I); +      continue; +    } + +    ++I; +    assert(*I == 'u' || *I == 'U'); + +    unsigned NumHexDigits; +    if (*I == 'u') +      NumHexDigits = 4; +    else +      NumHexDigits = 8; + +    assert(I + NumHexDigits <= E); + +    uint32_t CodePoint = 0; +    for (++I; NumHexDigits != 0; ++I, --NumHexDigits) { +      unsigned Value = llvm::hexDigitValue(*I); +      assert(Value != -1U); + +      CodePoint <<= 4; +      CodePoint += Value; +    } + +    appendCodePoint(CodePoint, Buf); +    --I; +  } +} + +/// ProcessUCNEscape - Read the Universal Character Name, check constraints and +/// return the UTF32. +static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, +                             const char *ThisTokEnd, +                             uint32_t &UcnVal, unsigned short &UcnLen, +                             FullSourceLoc Loc, DiagnosticsEngine *Diags, +                             const LangOptions &Features, +                             bool in_char_string_literal = false) { +  const char *UcnBegin = ThisTokBuf; + +  // Skip the '\u' char's. +  ThisTokBuf += 2; + +  if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { +    if (Diags) +      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, +           diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1); +    return false; +  } +  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); +  unsigned short UcnLenSave = UcnLen; +  for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) { +    int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); +    if (CharVal == -1) break; +    UcnVal <<= 4; +    UcnVal |= CharVal; +  } +  // If we didn't consume the proper number of digits, there is a problem. +  if (UcnLenSave) { +    if (Diags) +      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, +           diag::err_ucn_escape_incomplete); +    return false; +  } + +  // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2] +  if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints +      UcnVal > 0x10FFFF) {                      // maximum legal UTF32 value +    if (Diags) +      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, +           diag::err_ucn_escape_invalid); +    return false; +  } + +  // C++11 allows UCNs that refer to control characters and basic source +  // characters inside character and string literals +  if (UcnVal < 0xa0 && +      (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) {  // $, @, ` +    bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal); +    if (Diags) { +      char BasicSCSChar = UcnVal; +      if (UcnVal >= 0x20 && UcnVal < 0x7f) +        Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, +             IsError ? diag::err_ucn_escape_basic_scs : +                       diag::warn_cxx98_compat_literal_ucn_escape_basic_scs) +            << StringRef(&BasicSCSChar, 1); +      else +        Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, +             IsError ? diag::err_ucn_control_character : +                       diag::warn_cxx98_compat_literal_ucn_control_character); +    } +    if (IsError) +      return false; +  } + +  if (!Features.CPlusPlus && !Features.C99 && Diags) +    Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, +         diag::warn_ucn_not_valid_in_c89_literal); + +  return true; +} + +/// MeasureUCNEscape - Determine the number of bytes within the resulting string +/// which this UCN will occupy. +static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, +                            const char *ThisTokEnd, unsigned CharByteWidth, +                            const LangOptions &Features, bool &HadError) { +  // UTF-32: 4 bytes per escape. +  if (CharByteWidth == 4) +    return 4; + +  uint32_t UcnVal = 0; +  unsigned short UcnLen = 0; +  FullSourceLoc Loc; + +  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, +                        UcnLen, Loc, nullptr, Features, true)) { +    HadError = true; +    return 0; +  } + +  // UTF-16: 2 bytes for BMP, 4 bytes otherwise. +  if (CharByteWidth == 2) +    return UcnVal <= 0xFFFF ? 2 : 4; + +  // UTF-8. +  if (UcnVal < 0x80) +    return 1; +  if (UcnVal < 0x800) +    return 2; +  if (UcnVal < 0x10000) +    return 3; +  return 4; +} + +/// EncodeUCNEscape - Read the Universal Character Name, check constraints and +/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of +/// StringLiteralParser. When we decide to implement UCN's for identifiers, +/// we will likely rework our support for UCN's. +static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, +                            const char *ThisTokEnd, +                            char *&ResultBuf, bool &HadError, +                            FullSourceLoc Loc, unsigned CharByteWidth, +                            DiagnosticsEngine *Diags, +                            const LangOptions &Features) { +  typedef uint32_t UTF32; +  UTF32 UcnVal = 0; +  unsigned short UcnLen = 0; +  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, +                        Loc, Diags, Features, true)) { +    HadError = true; +    return; +  } + +  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) && +         "only character widths of 1, 2, or 4 bytes supported"); + +  (void)UcnLen; +  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported"); + +  if (CharByteWidth == 4) { +    // FIXME: Make the type of the result buffer correct instead of +    // using reinterpret_cast. +    llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf); +    *ResultPtr = UcnVal; +    ResultBuf += 4; +    return; +  } + +  if (CharByteWidth == 2) { +    // FIXME: Make the type of the result buffer correct instead of +    // using reinterpret_cast. +    llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf); + +    if (UcnVal <= (UTF32)0xFFFF) { +      *ResultPtr = UcnVal; +      ResultBuf += 2; +      return; +    } + +    // Convert to UTF16. +    UcnVal -= 0x10000; +    *ResultPtr     = 0xD800 + (UcnVal >> 10); +    *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF); +    ResultBuf += 4; +    return; +  } + +  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters"); + +  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8. +  // The conversion below was inspired by: +  //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c +  // First, we determine how many bytes the result will require. +  typedef uint8_t UTF8; + +  unsigned short bytesToWrite = 0; +  if (UcnVal < (UTF32)0x80) +    bytesToWrite = 1; +  else if (UcnVal < (UTF32)0x800) +    bytesToWrite = 2; +  else if (UcnVal < (UTF32)0x10000) +    bytesToWrite = 3; +  else +    bytesToWrite = 4; + +  const unsigned byteMask = 0xBF; +  const unsigned byteMark = 0x80; + +  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed +  // into the first byte, depending on how many bytes follow. +  static const UTF8 firstByteMark[5] = { +    0x00, 0x00, 0xC0, 0xE0, 0xF0 +  }; +  // Finally, we write the bytes into ResultBuf. +  ResultBuf += bytesToWrite; +  switch (bytesToWrite) { // note: everything falls through. +  case 4: +    *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; +    LLVM_FALLTHROUGH; +  case 3: +    *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; +    LLVM_FALLTHROUGH; +  case 2: +    *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; +    LLVM_FALLTHROUGH; +  case 1: +    *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]); +  } +  // Update the buffer. +  ResultBuf += bytesToWrite; +} + +///       integer-constant: [C99 6.4.4.1] +///         decimal-constant integer-suffix +///         octal-constant integer-suffix +///         hexadecimal-constant integer-suffix +///         binary-literal integer-suffix [GNU, C++1y] +///       user-defined-integer-literal: [C++11 lex.ext] +///         decimal-literal ud-suffix +///         octal-literal ud-suffix +///         hexadecimal-literal ud-suffix +///         binary-literal ud-suffix [GNU, C++1y] +///       decimal-constant: +///         nonzero-digit +///         decimal-constant digit +///       octal-constant: +///         0 +///         octal-constant octal-digit +///       hexadecimal-constant: +///         hexadecimal-prefix hexadecimal-digit +///         hexadecimal-constant hexadecimal-digit +///       hexadecimal-prefix: one of +///         0x 0X +///       binary-literal: +///         0b binary-digit +///         0B binary-digit +///         binary-literal binary-digit +///       integer-suffix: +///         unsigned-suffix [long-suffix] +///         unsigned-suffix [long-long-suffix] +///         long-suffix [unsigned-suffix] +///         long-long-suffix [unsigned-sufix] +///       nonzero-digit: +///         1 2 3 4 5 6 7 8 9 +///       octal-digit: +///         0 1 2 3 4 5 6 7 +///       hexadecimal-digit: +///         0 1 2 3 4 5 6 7 8 9 +///         a b c d e f +///         A B C D E F +///       binary-digit: +///         0 +///         1 +///       unsigned-suffix: one of +///         u U +///       long-suffix: one of +///         l L +///       long-long-suffix: one of +///         ll LL +/// +///       floating-constant: [C99 6.4.4.2] +///         TODO: add rules... +/// +NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, +                                           SourceLocation TokLoc, +                                           Preprocessor &PP) +  : PP(PP), ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) { + +  // This routine assumes that the range begin/end matches the regex for integer +  // and FP constants (specifically, the 'pp-number' regex), and assumes that +  // the byte at "*end" is both valid and not part of the regex.  Because of +  // this, it doesn't have to check for 'overscan' in various places. +  assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?"); + +  s = DigitsBegin = ThisTokBegin; +  saw_exponent = false; +  saw_period = false; +  saw_ud_suffix = false; +  saw_fixed_point_suffix = false; +  isLong = false; +  isUnsigned = false; +  isLongLong = false; +  isHalf = false; +  isFloat = false; +  isImaginary = false; +  isFloat16 = false; +  isFloat128 = false; +  MicrosoftInteger = 0; +  isFract = false; +  isAccum = false; +  hadError = false; + +  if (*s == '0') { // parse radix +    ParseNumberStartingWithZero(TokLoc); +    if (hadError) +      return; +  } else { // the first digit is non-zero +    radix = 10; +    s = SkipDigits(s); +    if (s == ThisTokEnd) { +      // Done. +    } else { +      ParseDecimalOrOctalCommon(TokLoc); +      if (hadError) +        return; +    } +  } + +  SuffixBegin = s; +  checkSeparator(TokLoc, s, CSK_AfterDigits); + +  // Initial scan to lookahead for fixed point suffix. +  if (PP.getLangOpts().FixedPoint) { +    for (const char *c = s; c != ThisTokEnd; ++c) { +      if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') { +        saw_fixed_point_suffix = true; +        break; +      } +    } +  } + +  // Parse the suffix.  At this point we can classify whether we have an FP or +  // integer constant. +  bool isFPConstant = isFloatingLiteral(); + +  // Loop over all of the characters of the suffix.  If we see something bad, +  // we break out of the loop. +  for (; s != ThisTokEnd; ++s) { +    switch (*s) { +    case 'R': +    case 'r': +      if (!PP.getLangOpts().FixedPoint) break; +      if (isFract || isAccum) break; +      if (!(saw_period || saw_exponent)) break; +      isFract = true; +      continue; +    case 'K': +    case 'k': +      if (!PP.getLangOpts().FixedPoint) break; +      if (isFract || isAccum) break; +      if (!(saw_period || saw_exponent)) break; +      isAccum = true; +      continue; +    case 'h':      // FP Suffix for "half". +    case 'H': +      // OpenCL Extension v1.2 s9.5 - h or H suffix for half type. +      if (!(PP.getLangOpts().Half || PP.getLangOpts().FixedPoint)) break; +      if (isIntegerLiteral()) break;  // Error for integer constant. +      if (isHalf || isFloat || isLong) break; // HH, FH, LH invalid. +      isHalf = true; +      continue;  // Success. +    case 'f':      // FP Suffix for "float" +    case 'F': +      if (!isFPConstant) break;  // Error for integer constant. +      if (isHalf || isFloat || isLong || isFloat128) +        break; // HF, FF, LF, QF invalid. + +      // CUDA host and device may have different _Float16 support, therefore +      // allows f16 literals to avoid false alarm. +      // ToDo: more precise check for CUDA. +      if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) && +          s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { +        s += 2; // success, eat up 2 characters. +        isFloat16 = true; +        continue; +      } + +      isFloat = true; +      continue;  // Success. +    case 'q':    // FP Suffix for "__float128" +    case 'Q': +      if (!isFPConstant) break;  // Error for integer constant. +      if (isHalf || isFloat || isLong || isFloat128) +        break; // HQ, FQ, LQ, QQ invalid. +      isFloat128 = true; +      continue;  // Success. +    case 'u': +    case 'U': +      if (isFPConstant) break;  // Error for floating constant. +      if (isUnsigned) break;    // Cannot be repeated. +      isUnsigned = true; +      continue;  // Success. +    case 'l': +    case 'L': +      if (isLong || isLongLong) break;  // Cannot be repeated. +      if (isHalf || isFloat || isFloat128) break;     // LH, LF, LQ invalid. + +      // Check for long long.  The L's need to be adjacent and the same case. +      if (s[1] == s[0]) { +        assert(s + 1 < ThisTokEnd && "didn't maximally munch?"); +        if (isFPConstant) break;        // long long invalid for floats. +        isLongLong = true; +        ++s;  // Eat both of them. +      } else { +        isLong = true; +      } +      continue;  // Success. +    case 'i': +    case 'I': +      if (PP.getLangOpts().MicrosoftExt) { +        if (isLong || isLongLong || MicrosoftInteger) +          break; + +        if (!isFPConstant) { +          // Allow i8, i16, i32, and i64. +          switch (s[1]) { +          case '8': +            s += 2; // i8 suffix +            MicrosoftInteger = 8; +            break; +          case '1': +            if (s[2] == '6') { +              s += 3; // i16 suffix +              MicrosoftInteger = 16; +            } +            break; +          case '3': +            if (s[2] == '2') { +              s += 3; // i32 suffix +              MicrosoftInteger = 32; +            } +            break; +          case '6': +            if (s[2] == '4') { +              s += 3; // i64 suffix +              MicrosoftInteger = 64; +            } +            break; +          default: +            break; +          } +        } +        if (MicrosoftInteger) { +          assert(s <= ThisTokEnd && "didn't maximally munch?"); +          break; +        } +      } +      LLVM_FALLTHROUGH; +    case 'j': +    case 'J': +      if (isImaginary) break;   // Cannot be repeated. +      isImaginary = true; +      continue;  // Success. +    } +    // If we reached here, there was an error or a ud-suffix. +    break; +  } + +  // "i", "if", and "il" are user-defined suffixes in C++1y. +  if (s != ThisTokEnd || isImaginary) { +    // FIXME: Don't bother expanding UCNs if !tok.hasUCN(). +    expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)); +    if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) { +      if (!isImaginary) { +        // Any suffix pieces we might have parsed are actually part of the +        // ud-suffix. +        isLong = false; +        isUnsigned = false; +        isLongLong = false; +        isFloat = false; +        isFloat16 = false; +        isHalf = false; +        isImaginary = false; +        MicrosoftInteger = 0; +        saw_fixed_point_suffix = false; +        isFract = false; +        isAccum = false; +      } + +      saw_ud_suffix = true; +      return; +    } + +    if (s != ThisTokEnd) { +      // Report an error if there are any. +      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), +              diag::err_invalid_suffix_constant) +          << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin) << isFPConstant; +      hadError = true; +    } +  } + +  if (!hadError && saw_fixed_point_suffix) { +    assert(isFract || isAccum); +  } +} + +/// ParseDecimalOrOctalCommon - This method is called for decimal or octal +/// numbers. It issues an error for illegal digits, and handles floating point +/// parsing. If it detects a floating point number, the radix is set to 10. +void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){ +  assert((radix == 8 || radix == 10) && "Unexpected radix"); + +  // If we have a hex digit other than 'e' (which denotes a FP exponent) then +  // the code is using an incorrect base. +  if (isHexDigit(*s) && *s != 'e' && *s != 'E' && +      !isValidUDSuffix(PP.getLangOpts(), StringRef(s, ThisTokEnd - s))) { +    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), +            diag::err_invalid_digit) << StringRef(s, 1) << (radix == 8 ? 1 : 0); +    hadError = true; +    return; +  } + +  if (*s == '.') { +    checkSeparator(TokLoc, s, CSK_AfterDigits); +    s++; +    radix = 10; +    saw_period = true; +    checkSeparator(TokLoc, s, CSK_BeforeDigits); +    s = SkipDigits(s); // Skip suffix. +  } +  if (*s == 'e' || *s == 'E') { // exponent +    checkSeparator(TokLoc, s, CSK_AfterDigits); +    const char *Exponent = s; +    s++; +    radix = 10; +    saw_exponent = true; +    if (s != ThisTokEnd && (*s == '+' || *s == '-'))  s++; // sign +    const char *first_non_digit = SkipDigits(s); +    if (containsDigits(s, first_non_digit)) { +      checkSeparator(TokLoc, s, CSK_BeforeDigits); +      s = first_non_digit; +    } else { +      if (!hadError) { +        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), +                diag::err_exponent_has_no_digits); +        hadError = true; +      } +      return; +    } +  } +} + +/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved +/// suffixes as ud-suffixes, because the diagnostic experience is better if we +/// treat it as an invalid suffix. +bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts, +                                           StringRef Suffix) { +  if (!LangOpts.CPlusPlus11 || Suffix.empty()) +    return false; + +  // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid. +  if (Suffix[0] == '_') +    return true; + +  // In C++11, there are no library suffixes. +  if (!LangOpts.CPlusPlus14) +    return false; + +  // In C++14, "s", "h", "min", "ms", "us", and "ns" are used in the library. +  // Per tweaked N3660, "il", "i", and "if" are also used in the library. +  // In C++2a "d" and "y" are used in the library. +  return llvm::StringSwitch<bool>(Suffix) +      .Cases("h", "min", "s", true) +      .Cases("ms", "us", "ns", true) +      .Cases("il", "i", "if", true) +      .Cases("d", "y", LangOpts.CPlusPlus2a) +      .Default(false); +} + +void NumericLiteralParser::checkSeparator(SourceLocation TokLoc, +                                          const char *Pos, +                                          CheckSeparatorKind IsAfterDigits) { +  if (IsAfterDigits == CSK_AfterDigits) { +    if (Pos == ThisTokBegin) +      return; +    --Pos; +  } else if (Pos == ThisTokEnd) +    return; + +  if (isDigitSeparator(*Pos)) { +    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin), +            diag::err_digit_separator_not_between_digits) +      << IsAfterDigits; +    hadError = true; +  } +} + +/// ParseNumberStartingWithZero - This method is called when the first character +/// of the number is found to be a zero.  This means it is either an octal +/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or +/// a floating point number (01239.123e4).  Eat the prefix, determining the +/// radix etc. +void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { +  assert(s[0] == '0' && "Invalid method call"); +  s++; + +  int c1 = s[0]; + +  // Handle a hex number like 0x1234. +  if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) { +    s++; +    assert(s < ThisTokEnd && "didn't maximally munch?"); +    radix = 16; +    DigitsBegin = s; +    s = SkipHexDigits(s); +    bool HasSignificandDigits = containsDigits(DigitsBegin, s); +    if (s == ThisTokEnd) { +      // Done. +    } else if (*s == '.') { +      s++; +      saw_period = true; +      const char *floatDigitsBegin = s; +      s = SkipHexDigits(s); +      if (containsDigits(floatDigitsBegin, s)) +        HasSignificandDigits = true; +      if (HasSignificandDigits) +        checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits); +    } + +    if (!HasSignificandDigits) { +      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), +              diag::err_hex_constant_requires) +          << PP.getLangOpts().CPlusPlus << 1; +      hadError = true; +      return; +    } + +    // A binary exponent can appear with or with a '.'. If dotted, the +    // binary exponent is required. +    if (*s == 'p' || *s == 'P') { +      checkSeparator(TokLoc, s, CSK_AfterDigits); +      const char *Exponent = s; +      s++; +      saw_exponent = true; +      if (s != ThisTokEnd && (*s == '+' || *s == '-'))  s++; // sign +      const char *first_non_digit = SkipDigits(s); +      if (!containsDigits(s, first_non_digit)) { +        if (!hadError) { +          PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), +                  diag::err_exponent_has_no_digits); +          hadError = true; +        } +        return; +      } +      checkSeparator(TokLoc, s, CSK_BeforeDigits); +      s = first_non_digit; + +      if (!PP.getLangOpts().HexFloats) +        PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus +                            ? diag::ext_hex_literal_invalid +                            : diag::ext_hex_constant_invalid); +      else if (PP.getLangOpts().CPlusPlus17) +        PP.Diag(TokLoc, diag::warn_cxx17_hex_literal); +    } else if (saw_period) { +      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), +              diag::err_hex_constant_requires) +          << PP.getLangOpts().CPlusPlus << 0; +      hadError = true; +    } +    return; +  } + +  // Handle simple binary numbers 0b01010 +  if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) { +    // 0b101010 is a C++1y / GCC extension. +    PP.Diag(TokLoc, +            PP.getLangOpts().CPlusPlus14 +              ? diag::warn_cxx11_compat_binary_literal +              : PP.getLangOpts().CPlusPlus +                ? diag::ext_binary_literal_cxx14 +                : diag::ext_binary_literal); +    ++s; +    assert(s < ThisTokEnd && "didn't maximally munch?"); +    radix = 2; +    DigitsBegin = s; +    s = SkipBinaryDigits(s); +    if (s == ThisTokEnd) { +      // Done. +    } else if (isHexDigit(*s) && +               !isValidUDSuffix(PP.getLangOpts(), +                                StringRef(s, ThisTokEnd - s))) { +      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), +              diag::err_invalid_digit) << StringRef(s, 1) << 2; +      hadError = true; +    } +    // Other suffixes will be diagnosed by the caller. +    return; +  } + +  // For now, the radix is set to 8. If we discover that we have a +  // floating point constant, the radix will change to 10. Octal floating +  // point constants are not permitted (only decimal and hexadecimal). +  radix = 8; +  DigitsBegin = s; +  s = SkipOctalDigits(s); +  if (s == ThisTokEnd) +    return; // Done, simple octal number like 01234 + +  // If we have some other non-octal digit that *is* a decimal digit, see if +  // this is part of a floating point number like 094.123 or 09e1. +  if (isDigit(*s)) { +    const char *EndDecimal = SkipDigits(s); +    if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') { +      s = EndDecimal; +      radix = 10; +    } +  } + +  ParseDecimalOrOctalCommon(TokLoc); +} + +static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) { +  switch (Radix) { +  case 2: +    return NumDigits <= 64; +  case 8: +    return NumDigits <= 64 / 3; // Digits are groups of 3 bits. +  case 10: +    return NumDigits <= 19; // floor(log10(2^64)) +  case 16: +    return NumDigits <= 64 / 4; // Digits are groups of 4 bits. +  default: +    llvm_unreachable("impossible Radix"); +  } +} + +/// GetIntegerValue - Convert this numeric literal value to an APInt that +/// matches Val's input width.  If there is an overflow, set Val to the low bits +/// of the result and return true.  Otherwise, return false. +bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { +  // Fast path: Compute a conservative bound on the maximum number of +  // bits per digit in this radix. If we can't possibly overflow a +  // uint64 based on that bound then do the simple conversion to +  // integer. This avoids the expensive overflow checking below, and +  // handles the common cases that matter (small decimal integers and +  // hex/octal values which don't overflow). +  const unsigned NumDigits = SuffixBegin - DigitsBegin; +  if (alwaysFitsInto64Bits(radix, NumDigits)) { +    uint64_t N = 0; +    for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr) +      if (!isDigitSeparator(*Ptr)) +        N = N * radix + llvm::hexDigitValue(*Ptr); + +    // This will truncate the value to Val's input width. Simply check +    // for overflow by comparing. +    Val = N; +    return Val.getZExtValue() != N; +  } + +  Val = 0; +  const char *Ptr = DigitsBegin; + +  llvm::APInt RadixVal(Val.getBitWidth(), radix); +  llvm::APInt CharVal(Val.getBitWidth(), 0); +  llvm::APInt OldVal = Val; + +  bool OverflowOccurred = false; +  while (Ptr < SuffixBegin) { +    if (isDigitSeparator(*Ptr)) { +      ++Ptr; +      continue; +    } + +    unsigned C = llvm::hexDigitValue(*Ptr++); + +    // If this letter is out of bound for this radix, reject it. +    assert(C < radix && "NumericLiteralParser ctor should have rejected this"); + +    CharVal = C; + +    // Add the digit to the value in the appropriate radix.  If adding in digits +    // made the value smaller, then this overflowed. +    OldVal = Val; + +    // Multiply by radix, did overflow occur on the multiply? +    Val *= RadixVal; +    OverflowOccurred |= Val.udiv(RadixVal) != OldVal; + +    // Add value, did overflow occur on the value? +    //   (a + b) ult b  <=> overflow +    Val += CharVal; +    OverflowOccurred |= Val.ult(CharVal); +  } +  return OverflowOccurred; +} + +llvm::APFloat::opStatus +NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { +  using llvm::APFloat; + +  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin); + +  llvm::SmallString<16> Buffer; +  StringRef Str(ThisTokBegin, n); +  if (Str.find('\'') != StringRef::npos) { +    Buffer.reserve(n); +    std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer), +                        &isDigitSeparator); +    Str = Buffer; +  } + +  return Result.convertFromString(Str, APFloat::rmNearestTiesToEven); +} + +static inline bool IsExponentPart(char c) { +  return c == 'p' || c == 'P' || c == 'e' || c == 'E'; +} + +bool NumericLiteralParser::GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale) { +  assert(radix == 16 || radix == 10); + +  // Find how many digits are needed to store the whole literal. +  unsigned NumDigits = SuffixBegin - DigitsBegin; +  if (saw_period) --NumDigits; + +  // Initial scan of the exponent if it exists +  bool ExpOverflowOccurred = false; +  bool NegativeExponent = false; +  const char *ExponentBegin; +  uint64_t Exponent = 0; +  int64_t BaseShift = 0; +  if (saw_exponent) { +    const char *Ptr = DigitsBegin; + +    while (!IsExponentPart(*Ptr)) ++Ptr; +    ExponentBegin = Ptr; +    ++Ptr; +    NegativeExponent = *Ptr == '-'; +    if (NegativeExponent) ++Ptr; + +    unsigned NumExpDigits = SuffixBegin - Ptr; +    if (alwaysFitsInto64Bits(radix, NumExpDigits)) { +      llvm::StringRef ExpStr(Ptr, NumExpDigits); +      llvm::APInt ExpInt(/*numBits=*/64, ExpStr, /*radix=*/10); +      Exponent = ExpInt.getZExtValue(); +    } else { +      ExpOverflowOccurred = true; +    } + +    if (NegativeExponent) BaseShift -= Exponent; +    else BaseShift += Exponent; +  } + +  // Number of bits needed for decimal literal is +  //   ceil(NumDigits * log2(10))       Integral part +  // + Scale                            Fractional part +  // + ceil(Exponent * log2(10))        Exponent +  // -------------------------------------------------- +  //   ceil((NumDigits + Exponent) * log2(10)) + Scale +  // +  // But for simplicity in handling integers, we can round up log2(10) to 4, +  // making: +  // 4 * (NumDigits + Exponent) + Scale +  // +  // Number of digits needed for hexadecimal literal is +  //   4 * NumDigits                    Integral part +  // + Scale                            Fractional part +  // + Exponent                         Exponent +  // -------------------------------------------------- +  //   (4 * NumDigits) + Scale + Exponent +  uint64_t NumBitsNeeded; +  if (radix == 10) +    NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale; +  else +    NumBitsNeeded = 4 * NumDigits + Exponent + Scale; + +  if (NumBitsNeeded > std::numeric_limits<unsigned>::max()) +    ExpOverflowOccurred = true; +  llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, /*isSigned=*/false); + +  bool FoundDecimal = false; + +  int64_t FractBaseShift = 0; +  const char *End = saw_exponent ? ExponentBegin : SuffixBegin; +  for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) { +    if (*Ptr == '.') { +      FoundDecimal = true; +      continue; +    } + +    // Normal reading of an integer +    unsigned C = llvm::hexDigitValue(*Ptr); +    assert(C < radix && "NumericLiteralParser ctor should have rejected this"); + +    Val *= radix; +    Val += C; + +    if (FoundDecimal) +      // Keep track of how much we will need to adjust this value by from the +      // number of digits past the radix point. +      --FractBaseShift; +  } + +  // For a radix of 16, we will be multiplying by 2 instead of 16. +  if (radix == 16) FractBaseShift *= 4; +  BaseShift += FractBaseShift; + +  Val <<= Scale; + +  uint64_t Base = (radix == 16) ? 2 : 10; +  if (BaseShift > 0) { +    for (int64_t i = 0; i < BaseShift; ++i) { +      Val *= Base; +    } +  } else if (BaseShift < 0) { +    for (int64_t i = BaseShift; i < 0 && !Val.isNullValue(); ++i) +      Val = Val.udiv(Base); +  } + +  bool IntOverflowOccurred = false; +  auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth()); +  if (Val.getBitWidth() > StoreVal.getBitWidth()) { +    IntOverflowOccurred |= Val.ugt(MaxVal.zext(Val.getBitWidth())); +    StoreVal = Val.trunc(StoreVal.getBitWidth()); +  } else if (Val.getBitWidth() < StoreVal.getBitWidth()) { +    IntOverflowOccurred |= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal); +    StoreVal = Val.zext(StoreVal.getBitWidth()); +  } else { +    StoreVal = Val; +  } + +  return IntOverflowOccurred || ExpOverflowOccurred; +} + +/// \verbatim +///       user-defined-character-literal: [C++11 lex.ext] +///         character-literal ud-suffix +///       ud-suffix: +///         identifier +///       character-literal: [C++11 lex.ccon] +///         ' c-char-sequence ' +///         u' c-char-sequence ' +///         U' c-char-sequence ' +///         L' c-char-sequence ' +///         u8' c-char-sequence ' [C++1z lex.ccon] +///       c-char-sequence: +///         c-char +///         c-char-sequence c-char +///       c-char: +///         any member of the source character set except the single-quote ', +///           backslash \, or new-line character +///         escape-sequence +///         universal-character-name +///       escape-sequence: +///         simple-escape-sequence +///         octal-escape-sequence +///         hexadecimal-escape-sequence +///       simple-escape-sequence: +///         one of \' \" \? \\ \a \b \f \n \r \t \v +///       octal-escape-sequence: +///         \ octal-digit +///         \ octal-digit octal-digit +///         \ octal-digit octal-digit octal-digit +///       hexadecimal-escape-sequence: +///         \x hexadecimal-digit +///         hexadecimal-escape-sequence hexadecimal-digit +///       universal-character-name: [C++11 lex.charset] +///         \u hex-quad +///         \U hex-quad hex-quad +///       hex-quad: +///         hex-digit hex-digit hex-digit hex-digit +/// \endverbatim +/// +CharLiteralParser::CharLiteralParser(const char *begin, const char *end, +                                     SourceLocation Loc, Preprocessor &PP, +                                     tok::TokenKind kind) { +  // At this point we know that the character matches the regex "(L|u|U)?'.*'". +  HadError = false; + +  Kind = kind; + +  const char *TokBegin = begin; + +  // Skip over wide character determinant. +  if (Kind != tok::char_constant) +    ++begin; +  if (Kind == tok::utf8_char_constant) +    ++begin; + +  // Skip over the entry quote. +  assert(begin[0] == '\'' && "Invalid token lexed"); +  ++begin; + +  // Remove an optional ud-suffix. +  if (end[-1] != '\'') { +    const char *UDSuffixEnd = end; +    do { +      --end; +    } while (end[-1] != '\''); +    // FIXME: Don't bother with this if !tok.hasUCN(). +    expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end)); +    UDSuffixOffset = end - TokBegin; +  } + +  // Trim the ending quote. +  assert(end != begin && "Invalid token lexed"); +  --end; + +  // FIXME: The "Value" is an uint64_t so we can handle char literals of +  // up to 64-bits. +  // FIXME: This extensively assumes that 'char' is 8-bits. +  assert(PP.getTargetInfo().getCharWidth() == 8 && +         "Assumes char is 8 bits"); +  assert(PP.getTargetInfo().getIntWidth() <= 64 && +         (PP.getTargetInfo().getIntWidth() & 7) == 0 && +         "Assumes sizeof(int) on target is <= 64 and a multiple of char"); +  assert(PP.getTargetInfo().getWCharWidth() <= 64 && +         "Assumes sizeof(wchar) on target is <= 64"); + +  SmallVector<uint32_t, 4> codepoint_buffer; +  codepoint_buffer.resize(end - begin); +  uint32_t *buffer_begin = &codepoint_buffer.front(); +  uint32_t *buffer_end = buffer_begin + codepoint_buffer.size(); + +  // Unicode escapes representing characters that cannot be correctly +  // represented in a single code unit are disallowed in character literals +  // by this implementation. +  uint32_t largest_character_for_kind; +  if (tok::wide_char_constant == Kind) { +    largest_character_for_kind = +        0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth()); +  } else if (tok::utf8_char_constant == Kind) { +    largest_character_for_kind = 0x7F; +  } else if (tok::utf16_char_constant == Kind) { +    largest_character_for_kind = 0xFFFF; +  } else if (tok::utf32_char_constant == Kind) { +    largest_character_for_kind = 0x10FFFF; +  } else { +    largest_character_for_kind = 0x7Fu; +  } + +  while (begin != end) { +    // Is this a span of non-escape characters? +    if (begin[0] != '\\') { +      char const *start = begin; +      do { +        ++begin; +      } while (begin != end && *begin != '\\'); + +      char const *tmp_in_start = start; +      uint32_t *tmp_out_start = buffer_begin; +      llvm::ConversionResult res = +          llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start), +                             reinterpret_cast<llvm::UTF8 const *>(begin), +                             &buffer_begin, buffer_end, llvm::strictConversion); +      if (res != llvm::conversionOK) { +        // If we see bad encoding for unprefixed character literals, warn and +        // simply copy the byte values, for compatibility with gcc and +        // older versions of clang. +        bool NoErrorOnBadEncoding = isAscii(); +        unsigned Msg = diag::err_bad_character_encoding; +        if (NoErrorOnBadEncoding) +          Msg = diag::warn_bad_character_encoding; +        PP.Diag(Loc, Msg); +        if (NoErrorOnBadEncoding) { +          start = tmp_in_start; +          buffer_begin = tmp_out_start; +          for (; start != begin; ++start, ++buffer_begin) +            *buffer_begin = static_cast<uint8_t>(*start); +        } else { +          HadError = true; +        } +      } else { +        for (; tmp_out_start < buffer_begin; ++tmp_out_start) { +          if (*tmp_out_start > largest_character_for_kind) { +            HadError = true; +            PP.Diag(Loc, diag::err_character_too_large); +          } +        } +      } + +      continue; +    } +    // Is this a Universal Character Name escape? +    if (begin[1] == 'u' || begin[1] == 'U') { +      unsigned short UcnLen = 0; +      if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen, +                            FullSourceLoc(Loc, PP.getSourceManager()), +                            &PP.getDiagnostics(), PP.getLangOpts(), true)) { +        HadError = true; +      } else if (*buffer_begin > largest_character_for_kind) { +        HadError = true; +        PP.Diag(Loc, diag::err_character_too_large); +      } + +      ++buffer_begin; +      continue; +    } +    unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); +    uint64_t result = +      ProcessCharEscape(TokBegin, begin, end, HadError, +                        FullSourceLoc(Loc,PP.getSourceManager()), +                        CharWidth, &PP.getDiagnostics(), PP.getLangOpts()); +    *buffer_begin++ = result; +  } + +  unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front(); + +  if (NumCharsSoFar > 1) { +    if (isWide()) +      PP.Diag(Loc, diag::warn_extraneous_char_constant); +    else if (isAscii() && NumCharsSoFar == 4) +      PP.Diag(Loc, diag::ext_four_char_character_literal); +    else if (isAscii()) +      PP.Diag(Loc, diag::ext_multichar_character_literal); +    else +      PP.Diag(Loc, diag::err_multichar_utf_character_literal); +    IsMultiChar = true; +  } else { +    IsMultiChar = false; +  } + +  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0); + +  // Narrow character literals act as though their value is concatenated +  // in this implementation, but warn on overflow. +  bool multi_char_too_long = false; +  if (isAscii() && isMultiChar()) { +    LitVal = 0; +    for (size_t i = 0; i < NumCharsSoFar; ++i) { +      // check for enough leading zeros to shift into +      multi_char_too_long |= (LitVal.countLeadingZeros() < 8); +      LitVal <<= 8; +      LitVal = LitVal + (codepoint_buffer[i] & 0xFF); +    } +  } else if (NumCharsSoFar > 0) { +    // otherwise just take the last character +    LitVal = buffer_begin[-1]; +  } + +  if (!HadError && multi_char_too_long) { +    PP.Diag(Loc, diag::warn_char_constant_too_large); +  } + +  // Transfer the value from APInt to uint64_t +  Value = LitVal.getZExtValue(); + +  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1") +  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple +  // character constants are not sign extended in the this implementation: +  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC. +  if (isAscii() && NumCharsSoFar == 1 && (Value & 128) && +      PP.getLangOpts().CharIsSigned) +    Value = (signed char)Value; +} + +/// \verbatim +///       string-literal: [C++0x lex.string] +///         encoding-prefix " [s-char-sequence] " +///         encoding-prefix R raw-string +///       encoding-prefix: +///         u8 +///         u +///         U +///         L +///       s-char-sequence: +///         s-char +///         s-char-sequence s-char +///       s-char: +///         any member of the source character set except the double-quote ", +///           backslash \, or new-line character +///         escape-sequence +///         universal-character-name +///       raw-string: +///         " d-char-sequence ( r-char-sequence ) d-char-sequence " +///       r-char-sequence: +///         r-char +///         r-char-sequence r-char +///       r-char: +///         any member of the source character set, except a right parenthesis ) +///           followed by the initial d-char-sequence (which may be empty) +///           followed by a double quote ". +///       d-char-sequence: +///         d-char +///         d-char-sequence d-char +///       d-char: +///         any member of the basic source character set except: +///           space, the left parenthesis (, the right parenthesis ), +///           the backslash \, and the control characters representing horizontal +///           tab, vertical tab, form feed, and newline. +///       escape-sequence: [C++0x lex.ccon] +///         simple-escape-sequence +///         octal-escape-sequence +///         hexadecimal-escape-sequence +///       simple-escape-sequence: +///         one of \' \" \? \\ \a \b \f \n \r \t \v +///       octal-escape-sequence: +///         \ octal-digit +///         \ octal-digit octal-digit +///         \ octal-digit octal-digit octal-digit +///       hexadecimal-escape-sequence: +///         \x hexadecimal-digit +///         hexadecimal-escape-sequence hexadecimal-digit +///       universal-character-name: +///         \u hex-quad +///         \U hex-quad hex-quad +///       hex-quad: +///         hex-digit hex-digit hex-digit hex-digit +/// \endverbatim +/// +StringLiteralParser:: +StringLiteralParser(ArrayRef<Token> StringToks, +                    Preprocessor &PP, bool Complain) +  : SM(PP.getSourceManager()), Features(PP.getLangOpts()), +    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr), +    MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), +    ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { +  init(StringToks); +} + +void StringLiteralParser::init(ArrayRef<Token> StringToks){ +  // The literal token may have come from an invalid source location (e.g. due +  // to a PCH error), in which case the token length will be 0. +  if (StringToks.empty() || StringToks[0].getLength() < 2) +    return DiagnoseLexingError(SourceLocation()); + +  // Scan all of the string portions, remember the max individual token length, +  // computing a bound on the concatenated string length, and see whether any +  // piece is a wide-string.  If any of the string portions is a wide-string +  // literal, the result is a wide-string literal [C99 6.4.5p4]. +  assert(!StringToks.empty() && "expected at least one token"); +  MaxTokenLength = StringToks[0].getLength(); +  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!"); +  SizeBound = StringToks[0].getLength()-2;  // -2 for "". +  Kind = StringToks[0].getKind(); + +  hadError = false; + +  // Implement Translation Phase #6: concatenation of string literals +  /// (C99 5.1.1.2p1).  The common case is only one string fragment. +  for (unsigned i = 1; i != StringToks.size(); ++i) { +    if (StringToks[i].getLength() < 2) +      return DiagnoseLexingError(StringToks[i].getLocation()); + +    // The string could be shorter than this if it needs cleaning, but this is a +    // reasonable bound, which is all we need. +    assert(StringToks[i].getLength() >= 2 && "literal token is invalid!"); +    SizeBound += StringToks[i].getLength()-2;  // -2 for "". + +    // Remember maximum string piece length. +    if (StringToks[i].getLength() > MaxTokenLength) +      MaxTokenLength = StringToks[i].getLength(); + +    // Remember if we see any wide or utf-8/16/32 strings. +    // Also check for illegal concatenations. +    if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) { +      if (isAscii()) { +        Kind = StringToks[i].getKind(); +      } else { +        if (Diags) +          Diags->Report(StringToks[i].getLocation(), +                        diag::err_unsupported_string_concat); +        hadError = true; +      } +    } +  } + +  // Include space for the null terminator. +  ++SizeBound; + +  // TODO: K&R warning: "traditional C rejects string constant concatenation" + +  // Get the width in bytes of char/wchar_t/char16_t/char32_t +  CharByteWidth = getCharWidth(Kind, Target); +  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple"); +  CharByteWidth /= 8; + +  // The output buffer size needs to be large enough to hold wide characters. +  // This is a worst-case assumption which basically corresponds to L"" "long". +  SizeBound *= CharByteWidth; + +  // Size the temporary buffer to hold the result string data. +  ResultBuf.resize(SizeBound); + +  // Likewise, but for each string piece. +  SmallString<512> TokenBuf; +  TokenBuf.resize(MaxTokenLength); + +  // Loop over all the strings, getting their spelling, and expanding them to +  // wide strings as appropriate. +  ResultPtr = &ResultBuf[0];   // Next byte to fill in. + +  Pascal = false; + +  SourceLocation UDSuffixTokLoc; + +  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) { +    const char *ThisTokBuf = &TokenBuf[0]; +    // Get the spelling of the token, which eliminates trigraphs, etc.  We know +    // that ThisTokBuf points to a buffer that is big enough for the whole token +    // and 'spelled' tokens can only shrink. +    bool StringInvalid = false; +    unsigned ThisTokLen = +      Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features, +                         &StringInvalid); +    if (StringInvalid) +      return DiagnoseLexingError(StringToks[i].getLocation()); + +    const char *ThisTokBegin = ThisTokBuf; +    const char *ThisTokEnd = ThisTokBuf+ThisTokLen; + +    // Remove an optional ud-suffix. +    if (ThisTokEnd[-1] != '"') { +      const char *UDSuffixEnd = ThisTokEnd; +      do { +        --ThisTokEnd; +      } while (ThisTokEnd[-1] != '"'); + +      StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd); + +      if (UDSuffixBuf.empty()) { +        if (StringToks[i].hasUCN()) +          expandUCNs(UDSuffixBuf, UDSuffix); +        else +          UDSuffixBuf.assign(UDSuffix); +        UDSuffixToken = i; +        UDSuffixOffset = ThisTokEnd - ThisTokBuf; +        UDSuffixTokLoc = StringToks[i].getLocation(); +      } else { +        SmallString<32> ExpandedUDSuffix; +        if (StringToks[i].hasUCN()) { +          expandUCNs(ExpandedUDSuffix, UDSuffix); +          UDSuffix = ExpandedUDSuffix; +        } + +        // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the +        // result of a concatenation involving at least one user-defined-string- +        // literal, all the participating user-defined-string-literals shall +        // have the same ud-suffix. +        if (UDSuffixBuf != UDSuffix) { +          if (Diags) { +            SourceLocation TokLoc = StringToks[i].getLocation(); +            Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) +              << UDSuffixBuf << UDSuffix +              << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) +              << SourceRange(TokLoc, TokLoc); +          } +          hadError = true; +        } +      } +    } + +    // Strip the end quote. +    --ThisTokEnd; + +    // TODO: Input character set mapping support. + +    // Skip marker for wide or unicode strings. +    if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') { +      ++ThisTokBuf; +      // Skip 8 of u8 marker for utf8 strings. +      if (ThisTokBuf[0] == '8') +        ++ThisTokBuf; +    } + +    // Check for raw string +    if (ThisTokBuf[0] == 'R') { +      ThisTokBuf += 2; // skip R" + +      const char *Prefix = ThisTokBuf; +      while (ThisTokBuf[0] != '(') +        ++ThisTokBuf; +      ++ThisTokBuf; // skip '(' + +      // Remove same number of characters from the end +      ThisTokEnd -= ThisTokBuf - Prefix; +      assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal"); + +      // C++14 [lex.string]p4: A source-file new-line in a raw string literal +      // results in a new-line in the resulting execution string-literal. +      StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf); +      while (!RemainingTokenSpan.empty()) { +        // Split the string literal on \r\n boundaries. +        size_t CRLFPos = RemainingTokenSpan.find("\r\n"); +        StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos); +        StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos); + +        // Copy everything before the \r\n sequence into the string literal. +        if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF)) +          hadError = true; + +        // Point into the \n inside the \r\n sequence and operate on the +        // remaining portion of the literal. +        RemainingTokenSpan = AfterCRLF.substr(1); +      } +    } else { +      if (ThisTokBuf[0] != '"') { +        // The file may have come from PCH and then changed after loading the +        // PCH; Fail gracefully. +        return DiagnoseLexingError(StringToks[i].getLocation()); +      } +      ++ThisTokBuf; // skip " + +      // Check if this is a pascal string +      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd && +          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') { + +        // If the \p sequence is found in the first token, we have a pascal string +        // Otherwise, if we already have a pascal string, ignore the first \p +        if (i == 0) { +          ++ThisTokBuf; +          Pascal = true; +        } else if (Pascal) +          ThisTokBuf += 2; +      } + +      while (ThisTokBuf != ThisTokEnd) { +        // Is this a span of non-escape characters? +        if (ThisTokBuf[0] != '\\') { +          const char *InStart = ThisTokBuf; +          do { +            ++ThisTokBuf; +          } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); + +          // Copy the character span over. +          if (CopyStringFragment(StringToks[i], ThisTokBegin, +                                 StringRef(InStart, ThisTokBuf - InStart))) +            hadError = true; +          continue; +        } +        // Is this a Universal Character Name escape? +        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { +          EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, +                          ResultPtr, hadError, +                          FullSourceLoc(StringToks[i].getLocation(), SM), +                          CharByteWidth, Diags, Features); +          continue; +        } +        // Otherwise, this is a non-UCN escape character.  Process it. +        unsigned ResultChar = +          ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, +                            FullSourceLoc(StringToks[i].getLocation(), SM), +                            CharByteWidth*8, Diags, Features); + +        if (CharByteWidth == 4) { +          // FIXME: Make the type of the result buffer correct instead of +          // using reinterpret_cast. +          llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr); +          *ResultWidePtr = ResultChar; +          ResultPtr += 4; +        } else if (CharByteWidth == 2) { +          // FIXME: Make the type of the result buffer correct instead of +          // using reinterpret_cast. +          llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr); +          *ResultWidePtr = ResultChar & 0xFFFF; +          ResultPtr += 2; +        } else { +          assert(CharByteWidth == 1 && "Unexpected char width"); +          *ResultPtr++ = ResultChar & 0xFF; +        } +      } +    } +  } + +  if (Pascal) { +    if (CharByteWidth == 4) { +      // FIXME: Make the type of the result buffer correct instead of +      // using reinterpret_cast. +      llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data()); +      ResultWidePtr[0] = GetNumStringChars() - 1; +    } else if (CharByteWidth == 2) { +      // FIXME: Make the type of the result buffer correct instead of +      // using reinterpret_cast. +      llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data()); +      ResultWidePtr[0] = GetNumStringChars() - 1; +    } else { +      assert(CharByteWidth == 1 && "Unexpected char width"); +      ResultBuf[0] = GetNumStringChars() - 1; +    } + +    // Verify that pascal strings aren't too large. +    if (GetStringLength() > 256) { +      if (Diags) +        Diags->Report(StringToks.front().getLocation(), +                      diag::err_pascal_string_too_long) +          << SourceRange(StringToks.front().getLocation(), +                         StringToks.back().getLocation()); +      hadError = true; +      return; +    } +  } else if (Diags) { +    // Complain if this string literal has too many characters. +    unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509; + +    if (GetNumStringChars() > MaxChars) +      Diags->Report(StringToks.front().getLocation(), +                    diag::ext_string_too_long) +        << GetNumStringChars() << MaxChars +        << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0) +        << SourceRange(StringToks.front().getLocation(), +                       StringToks.back().getLocation()); +  } +} + +static const char *resyncUTF8(const char *Err, const char *End) { +  if (Err == End) +    return End; +  End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err); +  while (++Err != End && (*Err & 0xC0) == 0x80) +    ; +  return Err; +} + +/// This function copies from Fragment, which is a sequence of bytes +/// within Tok's contents (which begin at TokBegin) into ResultPtr. +/// Performs widening for multi-byte characters. +bool StringLiteralParser::CopyStringFragment(const Token &Tok, +                                             const char *TokBegin, +                                             StringRef Fragment) { +  const llvm::UTF8 *ErrorPtrTmp; +  if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp)) +    return false; + +  // If we see bad encoding for unprefixed string literals, warn and +  // simply copy the byte values, for compatibility with gcc and older +  // versions of clang. +  bool NoErrorOnBadEncoding = isAscii(); +  if (NoErrorOnBadEncoding) { +    memcpy(ResultPtr, Fragment.data(), Fragment.size()); +    ResultPtr += Fragment.size(); +  } + +  if (Diags) { +    const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp); + +    FullSourceLoc SourceLoc(Tok.getLocation(), SM); +    const DiagnosticBuilder &Builder = +      Diag(Diags, Features, SourceLoc, TokBegin, +           ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()), +           NoErrorOnBadEncoding ? diag::warn_bad_string_encoding +                                : diag::err_bad_string_encoding); + +    const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end()); +    StringRef NextFragment(NextStart, Fragment.end()-NextStart); + +    // Decode into a dummy buffer. +    SmallString<512> Dummy; +    Dummy.reserve(Fragment.size() * CharByteWidth); +    char *Ptr = Dummy.data(); + +    while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) { +      const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp); +      NextStart = resyncUTF8(ErrorPtr, Fragment.end()); +      Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin, +                                     ErrorPtr, NextStart); +      NextFragment = StringRef(NextStart, Fragment.end()-NextStart); +    } +  } +  return !NoErrorOnBadEncoding; +} + +void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) { +  hadError = true; +  if (Diags) +    Diags->Report(Loc, diag::err_lexing_string); +} + +/// getOffsetOfStringByte - This function returns the offset of the +/// specified byte of the string data represented by Token.  This handles +/// advancing over escape sequences in the string. +unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, +                                                    unsigned ByteNo) const { +  // Get the spelling of the token. +  SmallString<32> SpellingBuffer; +  SpellingBuffer.resize(Tok.getLength()); + +  bool StringInvalid = false; +  const char *SpellingPtr = &SpellingBuffer[0]; +  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features, +                                       &StringInvalid); +  if (StringInvalid) +    return 0; + +  const char *SpellingStart = SpellingPtr; +  const char *SpellingEnd = SpellingPtr+TokLen; + +  // Handle UTF-8 strings just like narrow strings. +  if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8') +    SpellingPtr += 2; + +  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && +         SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet"); + +  // For raw string literals, this is easy. +  if (SpellingPtr[0] == 'R') { +    assert(SpellingPtr[1] == '"' && "Should be a raw string literal!"); +    // Skip 'R"'. +    SpellingPtr += 2; +    while (*SpellingPtr != '(') { +      ++SpellingPtr; +      assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal"); +    } +    // Skip '('. +    ++SpellingPtr; +    return SpellingPtr - SpellingStart + ByteNo; +  } + +  // Skip over the leading quote +  assert(SpellingPtr[0] == '"' && "Should be a string literal!"); +  ++SpellingPtr; + +  // Skip over bytes until we find the offset we're looking for. +  while (ByteNo) { +    assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!"); + +    // Step over non-escapes simply. +    if (*SpellingPtr != '\\') { +      ++SpellingPtr; +      --ByteNo; +      continue; +    } + +    // Otherwise, this is an escape character.  Advance over it. +    bool HadError = false; +    if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') { +      const char *EscapePtr = SpellingPtr; +      unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd, +                                      1, Features, HadError); +      if (Len > ByteNo) { +        // ByteNo is somewhere within the escape sequence. +        SpellingPtr = EscapePtr; +        break; +      } +      ByteNo -= Len; +    } else { +      ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError, +                        FullSourceLoc(Tok.getLocation(), SM), +                        CharByteWidth*8, Diags, Features); +      --ByteNo; +    } +    assert(!HadError && "This method isn't valid on erroneous strings"); +  } + +  return SpellingPtr-SpellingStart; +} + +/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved +/// suffixes as ud-suffixes, because the diagnostic experience is better if we +/// treat it as an invalid suffix. +bool StringLiteralParser::isValidUDSuffix(const LangOptions &LangOpts, +                                          StringRef Suffix) { +  return NumericLiteralParser::isValidUDSuffix(LangOpts, Suffix) || +         Suffix == "sv"; +} diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp new file mode 100644 index 000000000000..7ede00b4aa64 --- /dev/null +++ b/clang/lib/Lex/MacroArgs.cpp @@ -0,0 +1,307 @@ +//===--- MacroArgs.cpp - Formal argument info for Macros ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MacroArgs interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/MacroArgs.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/SaveAndRestore.h" +#include <algorithm> + +using namespace clang; + +/// MacroArgs ctor function - This destroys the vector passed in. +MacroArgs *MacroArgs::create(const MacroInfo *MI, +                             ArrayRef<Token> UnexpArgTokens, +                             bool VarargsElided, Preprocessor &PP) { +  assert(MI->isFunctionLike() && +         "Can't have args for an object-like macro!"); +  MacroArgs **ResultEnt = nullptr; +  unsigned ClosestMatch = ~0U; + +  // See if we have an entry with a big enough argument list to reuse on the +  // free list.  If so, reuse it. +  for (MacroArgs **Entry = &PP.MacroArgCache; *Entry; +       Entry = &(*Entry)->ArgCache) { +    if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() && +        (*Entry)->NumUnexpArgTokens < ClosestMatch) { +      ResultEnt = Entry; + +      // If we have an exact match, use it. +      if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size()) +        break; +      // Otherwise, use the best fit. +      ClosestMatch = (*Entry)->NumUnexpArgTokens; +    } +  } +  MacroArgs *Result; +  if (!ResultEnt) { +    // Allocate memory for a MacroArgs object with the lexer tokens at the end, +    // and construct the MacroArgs object. +    Result = new ( +        llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size()))) +        MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams()); +  } else { +    Result = *ResultEnt; +    // Unlink this node from the preprocessors singly linked list. +    *ResultEnt = Result->ArgCache; +    Result->NumUnexpArgTokens = UnexpArgTokens.size(); +    Result->VarargsElided = VarargsElided; +    Result->NumMacroArgs = MI->getNumParams(); +  } + +  // Copy the actual unexpanded tokens to immediately after the result ptr. +  if (!UnexpArgTokens.empty()) { +    static_assert(std::is_trivial<Token>::value, +                  "assume trivial copyability if copying into the " +                  "uninitialized array (as opposed to reusing a cached " +                  "MacroArgs)"); +    std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), +              Result->getTrailingObjects<Token>()); +  } + +  return Result; +} + +/// destroy - Destroy and deallocate the memory for this object. +/// +void MacroArgs::destroy(Preprocessor &PP) { +  // Don't clear PreExpArgTokens, just clear the entries.  Clearing the entries +  // would deallocate the element vectors. +  for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i) +    PreExpArgTokens[i].clear(); + +  // Add this to the preprocessor's free list. +  ArgCache = PP.MacroArgCache; +  PP.MacroArgCache = this; +} + +/// deallocate - This should only be called by the Preprocessor when managing +/// its freelist. +MacroArgs *MacroArgs::deallocate() { +  MacroArgs *Next = ArgCache; + +  // Run the dtor to deallocate the vectors. +  this->~MacroArgs(); +  // Release the memory for the object. +  static_assert(std::is_trivially_destructible<Token>::value, +                "assume trivially destructible and forego destructors"); +  free(this); + +  return Next; +} + + +/// getArgLength - Given a pointer to an expanded or unexpanded argument, +/// return the number of tokens, not counting the EOF, that make up the +/// argument. +unsigned MacroArgs::getArgLength(const Token *ArgPtr) { +  unsigned NumArgTokens = 0; +  for (; ArgPtr->isNot(tok::eof); ++ArgPtr) +    ++NumArgTokens; +  return NumArgTokens; +} + + +/// getUnexpArgument - Return the unexpanded tokens for the specified formal. +/// +const Token *MacroArgs::getUnexpArgument(unsigned Arg) const { + +  assert(Arg < getNumMacroArguments() && "Invalid arg #"); +  // The unexpanded argument tokens start immediately after the MacroArgs object +  // in memory. +  const Token *Start = getTrailingObjects<Token>(); +  const Token *Result = Start; + +  // Scan to find Arg. +  for (; Arg; ++Result) { +    assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); +    if (Result->is(tok::eof)) +      --Arg; +  } +  assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); +  return Result; +} + +bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI, +                                            Preprocessor &PP) { +  if (!MI->isVariadic()) +    return false; +  const int VariadicArgIndex = getNumMacroArguments() - 1; +  return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof); +} + +/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected +/// by pre-expansion, return false.  Otherwise, conservatively return true. +bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, +                                     Preprocessor &PP) const { +  // If there are no identifiers in the argument list, or if the identifiers are +  // known to not be macros, pre-expansion won't modify it. +  for (; ArgTok->isNot(tok::eof); ++ArgTok) +    if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) +      if (II->hasMacroDefinition()) +        // Return true even though the macro could be a function-like macro +        // without a following '(' token, or could be disabled, or not visible. +        return true; +  return false; +} + +/// getPreExpArgument - Return the pre-expanded form of the specified +/// argument. +const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg, +                                                       Preprocessor &PP) { +  assert(Arg < getNumMacroArguments() && "Invalid argument number!"); + +  // If we have already computed this, return it. +  if (PreExpArgTokens.size() < getNumMacroArguments()) +    PreExpArgTokens.resize(getNumMacroArguments()); + +  std::vector<Token> &Result = PreExpArgTokens[Arg]; +  if (!Result.empty()) return Result; + +  SaveAndRestore<bool> PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true); + +  const Token *AT = getUnexpArgument(Arg); +  unsigned NumToks = getArgLength(AT)+1;  // Include the EOF. + +  // Otherwise, we have to pre-expand this argument, populating Result.  To do +  // this, we set up a fake TokenLexer to lex from the unexpanded argument +  // list.  With this installed, we lex expanded tokens until we hit the EOF +  // token at the end of the unexp list. +  PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, +                      false /*owns tokens*/, false /*is reinject*/); + +  // Lex all of the macro-expanded tokens into Result. +  do { +    Result.push_back(Token()); +    Token &Tok = Result.back(); +    PP.Lex(Tok); +  } while (Result.back().isNot(tok::eof)); + +  // Pop the token stream off the top of the stack.  We know that the internal +  // pointer inside of it is to the "end" of the token stream, but the stack +  // will not otherwise be popped until the next token is lexed.  The problem is +  // that the token may be lexed sometime after the vector of tokens itself is +  // destroyed, which would be badness. +  if (PP.InCachingLexMode()) +    PP.ExitCachingLexMode(); +  PP.RemoveTopOfLexerStack(); +  return Result; +} + + +/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of +/// tokens into the literal string token that should be produced by the C # +/// preprocessor operator.  If Charify is true, then it should be turned into +/// a character literal for the Microsoft charize (#@) extension. +/// +Token MacroArgs::StringifyArgument(const Token *ArgToks, +                                   Preprocessor &PP, bool Charify, +                                   SourceLocation ExpansionLocStart, +                                   SourceLocation ExpansionLocEnd) { +  Token Tok; +  Tok.startToken(); +  Tok.setKind(Charify ? tok::char_constant : tok::string_literal); + +  const Token *ArgTokStart = ArgToks; + +  // Stringify all the tokens. +  SmallString<128> Result; +  Result += "\""; + +  bool isFirst = true; +  for (; ArgToks->isNot(tok::eof); ++ArgToks) { +    const Token &Tok = *ArgToks; +    if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) +      Result += ' '; +    isFirst = false; + +    // If this is a string or character constant, escape the token as specified +    // by 6.10.3.2p2. +    if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc. +        Tok.is(tok::char_constant) ||          // 'x' +        Tok.is(tok::wide_char_constant) ||     // L'x'. +        Tok.is(tok::utf8_char_constant) ||     // u8'x'. +        Tok.is(tok::utf16_char_constant) ||    // u'x'. +        Tok.is(tok::utf32_char_constant)) {    // U'x'. +      bool Invalid = false; +      std::string TokStr = PP.getSpelling(Tok, &Invalid); +      if (!Invalid) { +        std::string Str = Lexer::Stringify(TokStr); +        Result.append(Str.begin(), Str.end()); +      } +    } else if (Tok.is(tok::code_completion)) { +      PP.CodeCompleteNaturalLanguage(); +    } else { +      // Otherwise, just append the token.  Do some gymnastics to get the token +      // in place and avoid copies where possible. +      unsigned CurStrLen = Result.size(); +      Result.resize(CurStrLen+Tok.getLength()); +      const char *BufPtr = Result.data() + CurStrLen; +      bool Invalid = false; +      unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid); + +      if (!Invalid) { +        // If getSpelling returned a pointer to an already uniqued version of +        // the string instead of filling in BufPtr, memcpy it onto our string. +        if (ActualTokLen && BufPtr != &Result[CurStrLen]) +          memcpy(&Result[CurStrLen], BufPtr, ActualTokLen); + +        // If the token was dirty, the spelling may be shorter than the token. +        if (ActualTokLen != Tok.getLength()) +          Result.resize(CurStrLen+ActualTokLen); +      } +    } +  } + +  // If the last character of the string is a \, and if it isn't escaped, this +  // is an invalid string literal, diagnose it as specified in C99. +  if (Result.back() == '\\') { +    // Count the number of consecutive \ characters.  If even, then they are +    // just escaped backslashes, otherwise it's an error. +    unsigned FirstNonSlash = Result.size()-2; +    // Guaranteed to find the starting " if nothing else. +    while (Result[FirstNonSlash] == '\\') +      --FirstNonSlash; +    if ((Result.size()-1-FirstNonSlash) & 1) { +      // Diagnose errors for things like: #define F(X) #X   /   F(\) +      PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); +      Result.pop_back();  // remove one of the \'s. +    } +  } +  Result += '"'; + +  // If this is the charify operation and the result is not a legal character +  // constant, diagnose it. +  if (Charify) { +    // First step, turn double quotes into single quotes: +    Result[0] = '\''; +    Result[Result.size()-1] = '\''; + +    // Check for bogus character. +    bool isBad = false; +    if (Result.size() == 3) +      isBad = Result[1] == '\'';   // ''' is not legal. '\' already fixed above. +    else +      isBad = (Result.size() != 4 || Result[1] != '\\');  // Not '\x' + +    if (isBad) { +      PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); +      Result = "' '";  // Use something arbitrary, but legal. +    } +  } + +  PP.CreateString(Result, Tok, +                  ExpansionLocStart, ExpansionLocEnd); +  return Tok; +} diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp new file mode 100644 index 000000000000..1ccd140364ae --- /dev/null +++ b/clang/lib/Lex/MacroInfo.cpp @@ -0,0 +1,248 @@ +//===- MacroInfo.cpp - Information about #defined identifiers -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MacroInfo interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/MacroInfo.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <utility> + +using namespace clang; + +MacroInfo::MacroInfo(SourceLocation DefLoc) +    : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false), +      IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false), +      HasCommaPasting(false), IsDisabled(false), IsUsed(false), +      IsAllowRedefinitionsWithoutWarning(false), IsWarnIfUnused(false), +      UsedForHeaderGuard(false) {} + +unsigned MacroInfo::getDefinitionLengthSlow(const SourceManager &SM) const { +  assert(!IsDefinitionLengthCached); +  IsDefinitionLengthCached = true; + +  if (ReplacementTokens.empty()) +    return (DefinitionLength = 0); + +  const Token &firstToken = ReplacementTokens.front(); +  const Token &lastToken = ReplacementTokens.back(); +  SourceLocation macroStart = firstToken.getLocation(); +  SourceLocation macroEnd = lastToken.getLocation(); +  assert(macroStart.isValid() && macroEnd.isValid()); +  assert((macroStart.isFileID() || firstToken.is(tok::comment)) && +         "Macro defined in macro?"); +  assert((macroEnd.isFileID() || lastToken.is(tok::comment)) && +         "Macro defined in macro?"); +  std::pair<FileID, unsigned> +      startInfo = SM.getDecomposedExpansionLoc(macroStart); +  std::pair<FileID, unsigned> +      endInfo = SM.getDecomposedExpansionLoc(macroEnd); +  assert(startInfo.first == endInfo.first && +         "Macro definition spanning multiple FileIDs ?"); +  assert(startInfo.second <= endInfo.second); +  DefinitionLength = endInfo.second - startInfo.second; +  DefinitionLength += lastToken.getLength(); + +  return DefinitionLength; +} + +/// Return true if the specified macro definition is equal to +/// this macro in spelling, arguments, and whitespace. +/// +/// \param Syntactically if true, the macro definitions can be identical even +/// if they use different identifiers for the function macro parameters. +/// Otherwise the comparison is lexical and this implements the rules in +/// C99 6.10.3. +bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, +                              bool Syntactically) const { +  bool Lexically = !Syntactically; + +  // Check # tokens in replacement, number of args, and various flags all match. +  if (ReplacementTokens.size() != Other.ReplacementTokens.size() || +      getNumParams() != Other.getNumParams() || +      isFunctionLike() != Other.isFunctionLike() || +      isC99Varargs() != Other.isC99Varargs() || +      isGNUVarargs() != Other.isGNUVarargs()) +    return false; + +  if (Lexically) { +    // Check arguments. +    for (param_iterator I = param_begin(), OI = Other.param_begin(), +                        E = param_end(); +         I != E; ++I, ++OI) +      if (*I != *OI) return false; +  } + +  // Check all the tokens. +  for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) { +    const Token &A = ReplacementTokens[i]; +    const Token &B = Other.ReplacementTokens[i]; +    if (A.getKind() != B.getKind()) +      return false; + +    // If this isn't the first first token, check that the whitespace and +    // start-of-line characteristics match. +    if (i != 0 && +        (A.isAtStartOfLine() != B.isAtStartOfLine() || +         A.hasLeadingSpace() != B.hasLeadingSpace())) +      return false; + +    // If this is an identifier, it is easy. +    if (A.getIdentifierInfo() || B.getIdentifierInfo()) { +      if (A.getIdentifierInfo() == B.getIdentifierInfo()) +        continue; +      if (Lexically) +        return false; +      // With syntactic equivalence the parameter names can be different as long +      // as they are used in the same place. +      int AArgNum = getParameterNum(A.getIdentifierInfo()); +      if (AArgNum == -1) +        return false; +      if (AArgNum != Other.getParameterNum(B.getIdentifierInfo())) +        return false; +      continue; +    } + +    // Otherwise, check the spelling. +    if (PP.getSpelling(A) != PP.getSpelling(B)) +      return false; +  } + +  return true; +} + +LLVM_DUMP_METHOD void MacroInfo::dump() const { +  llvm::raw_ostream &Out = llvm::errs(); + +  // FIXME: Dump locations. +  Out << "MacroInfo " << this; +  if (IsBuiltinMacro) Out << " builtin"; +  if (IsDisabled) Out << " disabled"; +  if (IsUsed) Out << " used"; +  if (IsAllowRedefinitionsWithoutWarning) +    Out << " allow_redefinitions_without_warning"; +  if (IsWarnIfUnused) Out << " warn_if_unused"; +  if (UsedForHeaderGuard) Out << " header_guard"; + +  Out << "\n    #define <macro>"; +  if (IsFunctionLike) { +    Out << "("; +    for (unsigned I = 0; I != NumParameters; ++I) { +      if (I) Out << ", "; +      Out << ParameterList[I]->getName(); +    } +    if (IsC99Varargs || IsGNUVarargs) { +      if (NumParameters && IsC99Varargs) Out << ", "; +      Out << "..."; +    } +    Out << ")"; +  } + +  bool First = true; +  for (const Token &Tok : ReplacementTokens) { +    // Leading space is semantically meaningful in a macro definition, +    // so preserve it in the dump output. +    if (First || Tok.hasLeadingSpace()) +      Out << " "; +    First = false; + +    if (const char *Punc = tok::getPunctuatorSpelling(Tok.getKind())) +      Out << Punc; +    else if (Tok.isLiteral() && Tok.getLiteralData()) +      Out << StringRef(Tok.getLiteralData(), Tok.getLength()); +    else if (auto *II = Tok.getIdentifierInfo()) +      Out << II->getName(); +    else +      Out << Tok.getName(); +  } +} + +MacroDirective::DefInfo MacroDirective::getDefinition() { +  MacroDirective *MD = this; +  SourceLocation UndefLoc; +  Optional<bool> isPublic; +  for (; MD; MD = MD->getPrevious()) { +    if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD)) +      return DefInfo(DefMD, UndefLoc, +                     !isPublic.hasValue() || isPublic.getValue()); + +    if (UndefMacroDirective *UndefMD = dyn_cast<UndefMacroDirective>(MD)) { +      UndefLoc = UndefMD->getLocation(); +      continue; +    } + +    VisibilityMacroDirective *VisMD = cast<VisibilityMacroDirective>(MD); +    if (!isPublic.hasValue()) +      isPublic = VisMD->isPublic(); +  } + +  return DefInfo(nullptr, UndefLoc, +                 !isPublic.hasValue() || isPublic.getValue()); +} + +const MacroDirective::DefInfo +MacroDirective::findDirectiveAtLoc(SourceLocation L, +                                   const SourceManager &SM) const { +  assert(L.isValid() && "SourceLocation is invalid."); +  for (DefInfo Def = getDefinition(); Def; Def = Def.getPreviousDefinition()) { +    if (Def.getLocation().isInvalid() ||  // For macros defined on the command line. +        SM.isBeforeInTranslationUnit(Def.getLocation(), L)) +      return (!Def.isUndefined() || +              SM.isBeforeInTranslationUnit(L, Def.getUndefLocation())) +                  ? Def : DefInfo(); +  } +  return DefInfo(); +} + +LLVM_DUMP_METHOD void MacroDirective::dump() const { +  llvm::raw_ostream &Out = llvm::errs(); + +  switch (getKind()) { +  case MD_Define: Out << "DefMacroDirective"; break; +  case MD_Undefine: Out << "UndefMacroDirective"; break; +  case MD_Visibility: Out << "VisibilityMacroDirective"; break; +  } +  Out << " " << this; +  // FIXME: Dump SourceLocation. +  if (auto *Prev = getPrevious()) +    Out << " prev " << Prev; +  if (IsFromPCH) Out << " from_pch"; + +  if (isa<VisibilityMacroDirective>(this)) +    Out << (IsPublic ? " public" : " private"); + +  if (auto *DMD = dyn_cast<DefMacroDirective>(this)) { +    if (auto *Info = DMD->getInfo()) { +      Out << "\n  "; +      Info->dump(); +    } +  } +  Out << "\n"; +} + +ModuleMacro *ModuleMacro::create(Preprocessor &PP, Module *OwningModule, +                                 IdentifierInfo *II, MacroInfo *Macro, +                                 ArrayRef<ModuleMacro *> Overrides) { +  void *Mem = PP.getPreprocessorAllocator().Allocate( +      sizeof(ModuleMacro) + sizeof(ModuleMacro *) * Overrides.size(), +      alignof(ModuleMacro)); +  return new (Mem) ModuleMacro(OwningModule, II, Macro, Overrides); +} diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp new file mode 100644 index 000000000000..db59629997ee --- /dev/null +++ b/clang/lib/Lex/ModuleMap.cpp @@ -0,0 +1,3010 @@ +//===- ModuleMap.cpp - Describe the layout of modules ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the ModuleMap implementation, which describes the layout +// of a module as it relates to headers. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/ModuleMap.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <string> +#include <system_error> +#include <utility> + +using namespace clang; + +void ModuleMapCallbacks::anchor() {} + +void ModuleMap::resolveLinkAsDependencies(Module *Mod) { +  auto PendingLinkAs = PendingLinkAsModule.find(Mod->Name); +  if (PendingLinkAs != PendingLinkAsModule.end()) { +    for (auto &Name : PendingLinkAs->second) { +      auto *M = findModule(Name.getKey()); +      if (M) +        M->UseExportAsModuleLinkName = true; +    } +  } +} + +void ModuleMap::addLinkAsDependency(Module *Mod) { +  if (findModule(Mod->ExportAsModule)) +    Mod->UseExportAsModuleLinkName = true; +  else +    PendingLinkAsModule[Mod->ExportAsModule].insert(Mod->Name); +} + +Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) { +  switch ((int)Role) { +  default: llvm_unreachable("unknown header role"); +  case NormalHeader: +    return Module::HK_Normal; +  case PrivateHeader: +    return Module::HK_Private; +  case TextualHeader: +    return Module::HK_Textual; +  case PrivateHeader | TextualHeader: +    return Module::HK_PrivateTextual; +  } +} + +ModuleMap::ModuleHeaderRole +ModuleMap::headerKindToRole(Module::HeaderKind Kind) { +  switch ((int)Kind) { +  case Module::HK_Normal: +    return NormalHeader; +  case Module::HK_Private: +    return PrivateHeader; +  case Module::HK_Textual: +    return TextualHeader; +  case Module::HK_PrivateTextual: +    return ModuleHeaderRole(PrivateHeader | TextualHeader); +  case Module::HK_Excluded: +    llvm_unreachable("unexpected header kind"); +  } +  llvm_unreachable("unknown header kind"); +} + +Module::ExportDecl +ModuleMap::resolveExport(Module *Mod, +                         const Module::UnresolvedExportDecl &Unresolved, +                         bool Complain) const { +  // We may have just a wildcard. +  if (Unresolved.Id.empty()) { +    assert(Unresolved.Wildcard && "Invalid unresolved export"); +    return Module::ExportDecl(nullptr, true); +  } + +  // Resolve the module-id. +  Module *Context = resolveModuleId(Unresolved.Id, Mod, Complain); +  if (!Context) +    return {}; + +  return Module::ExportDecl(Context, Unresolved.Wildcard); +} + +Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod, +                                   bool Complain) const { +  // Find the starting module. +  Module *Context = lookupModuleUnqualified(Id[0].first, Mod); +  if (!Context) { +    if (Complain) +      Diags.Report(Id[0].second, diag::err_mmap_missing_module_unqualified) +      << Id[0].first << Mod->getFullModuleName(); + +    return nullptr; +  } + +  // Dig into the module path. +  for (unsigned I = 1, N = Id.size(); I != N; ++I) { +    Module *Sub = lookupModuleQualified(Id[I].first, Context); +    if (!Sub) { +      if (Complain) +        Diags.Report(Id[I].second, diag::err_mmap_missing_module_qualified) +        << Id[I].first << Context->getFullModuleName() +        << SourceRange(Id[0].second, Id[I-1].second); + +      return nullptr; +    } + +    Context = Sub; +  } + +  return Context; +} + +/// Append to \p Paths the set of paths needed to get to the +/// subframework in which the given module lives. +static void appendSubframeworkPaths(Module *Mod, +                                    SmallVectorImpl<char> &Path) { +  // Collect the framework names from the given module to the top-level module. +  SmallVector<StringRef, 2> Paths; +  for (; Mod; Mod = Mod->Parent) { +    if (Mod->IsFramework) +      Paths.push_back(Mod->Name); +  } + +  if (Paths.empty()) +    return; + +  // Add Frameworks/Name.framework for each subframework. +  for (unsigned I = Paths.size() - 1; I != 0; --I) +    llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework"); +} + +const FileEntry *ModuleMap::findHeader( +    Module *M, const Module::UnresolvedHeaderDirective &Header, +    SmallVectorImpl<char> &RelativePathName, bool &NeedsFramework) { +  // Search for the header file within the module's home directory. +  auto *Directory = M->Directory; +  SmallString<128> FullPathName(Directory->getName()); + +  auto GetFile = [&](StringRef Filename) -> const FileEntry * { +    auto File = SourceMgr.getFileManager().getFile(Filename); +    if (!File || +        (Header.Size && (*File)->getSize() != *Header.Size) || +        (Header.ModTime && (*File)->getModificationTime() != *Header.ModTime)) +      return nullptr; +    return *File; +  }; + +  auto GetFrameworkFile = [&]() -> const FileEntry * { +    unsigned FullPathLength = FullPathName.size(); +    appendSubframeworkPaths(M, RelativePathName); +    unsigned RelativePathLength = RelativePathName.size(); + +    // Check whether this file is in the public headers. +    llvm::sys::path::append(RelativePathName, "Headers", Header.FileName); +    llvm::sys::path::append(FullPathName, RelativePathName); +    if (auto *File = GetFile(FullPathName)) +      return File; + +    // Check whether this file is in the private headers. +    // Ideally, private modules in the form 'FrameworkName.Private' should +    // be defined as 'module FrameworkName.Private', and not as +    // 'framework module FrameworkName.Private', since a 'Private.Framework' +    // does not usually exist. However, since both are currently widely used +    // for private modules, make sure we find the right path in both cases. +    if (M->IsFramework && M->Name == "Private") +      RelativePathName.clear(); +    else +      RelativePathName.resize(RelativePathLength); +    FullPathName.resize(FullPathLength); +    llvm::sys::path::append(RelativePathName, "PrivateHeaders", +                            Header.FileName); +    llvm::sys::path::append(FullPathName, RelativePathName); +    return GetFile(FullPathName); +  }; + +  if (llvm::sys::path::is_absolute(Header.FileName)) { +    RelativePathName.clear(); +    RelativePathName.append(Header.FileName.begin(), Header.FileName.end()); +    return GetFile(Header.FileName); +  } + +  if (M->isPartOfFramework()) +    return GetFrameworkFile(); + +  // Lookup for normal headers. +  llvm::sys::path::append(RelativePathName, Header.FileName); +  llvm::sys::path::append(FullPathName, RelativePathName); +  auto *NormalHdrFile = GetFile(FullPathName); + +  if (M && !NormalHdrFile && Directory->getName().endswith(".framework")) { +    // The lack of 'framework' keyword in a module declaration it's a simple +    // mistake we can diagnose when the header exists within the proper +    // framework style path. +    FullPathName.assign(Directory->getName()); +    RelativePathName.clear(); +    if (GetFrameworkFile()) { +      Diags.Report(Header.FileNameLoc, +                   diag::warn_mmap_incomplete_framework_module_declaration) +          << Header.FileName << M->getFullModuleName(); +      NeedsFramework = true; +    } +    return nullptr; +  } + +  return NormalHdrFile; +} + +void ModuleMap::resolveHeader(Module *Mod, +                              const Module::UnresolvedHeaderDirective &Header, +                              bool &NeedsFramework) { +  SmallString<128> RelativePathName; +  if (const FileEntry *File = +          findHeader(Mod, Header, RelativePathName, NeedsFramework)) { +    if (Header.IsUmbrella) { +      const DirectoryEntry *UmbrellaDir = File->getDir(); +      if (Module *UmbrellaMod = UmbrellaDirs[UmbrellaDir]) +        Diags.Report(Header.FileNameLoc, diag::err_mmap_umbrella_clash) +          << UmbrellaMod->getFullModuleName(); +      else +        // Record this umbrella header. +        setUmbrellaHeader(Mod, File, RelativePathName.str()); +    } else { +      Module::Header H = {RelativePathName.str(), File}; +      if (Header.Kind == Module::HK_Excluded) +        excludeHeader(Mod, H); +      else +        addHeader(Mod, H, headerKindToRole(Header.Kind)); +    } +  } else if (Header.HasBuiltinHeader && !Header.Size && !Header.ModTime) { +    // There's a builtin header but no corresponding on-disk header. Assume +    // this was supposed to modularize the builtin header alone. +  } else if (Header.Kind == Module::HK_Excluded) { +    // Ignore missing excluded header files. They're optional anyway. +  } else { +    // If we find a module that has a missing header, we mark this module as +    // unavailable and store the header directive for displaying diagnostics. +    Mod->MissingHeaders.push_back(Header); +    // A missing header with stat information doesn't make the module +    // unavailable; this keeps our behavior consistent as headers are lazily +    // resolved. (Such a module still can't be built though, except from +    // preprocessed source.) +    if (!Header.Size && !Header.ModTime) +      Mod->markUnavailable(); +  } +} + +bool ModuleMap::resolveAsBuiltinHeader( +    Module *Mod, const Module::UnresolvedHeaderDirective &Header) { +  if (Header.Kind == Module::HK_Excluded || +      llvm::sys::path::is_absolute(Header.FileName) || +      Mod->isPartOfFramework() || !Mod->IsSystem || Header.IsUmbrella || +      !BuiltinIncludeDir || BuiltinIncludeDir == Mod->Directory || +      !isBuiltinHeader(Header.FileName)) +    return false; + +  // This is a system module with a top-level header. This header +  // may have a counterpart (or replacement) in the set of headers +  // supplied by Clang. Find that builtin header. +  SmallString<128> Path; +  llvm::sys::path::append(Path, BuiltinIncludeDir->getName(), Header.FileName); +  auto File = SourceMgr.getFileManager().getFile(Path); +  if (!File) +    return false; + +  auto Role = headerKindToRole(Header.Kind); +  Module::Header H = {Path.str(), *File}; +  addHeader(Mod, H, Role); +  return true; +} + +ModuleMap::ModuleMap(SourceManager &SourceMgr, DiagnosticsEngine &Diags, +                     const LangOptions &LangOpts, const TargetInfo *Target, +                     HeaderSearch &HeaderInfo) +    : SourceMgr(SourceMgr), Diags(Diags), LangOpts(LangOpts), Target(Target), +      HeaderInfo(HeaderInfo) { +  MMapLangOpts.LineComment = true; +} + +ModuleMap::~ModuleMap() { +  for (auto &M : Modules) +    delete M.getValue(); +  for (auto *M : ShadowModules) +    delete M; +} + +void ModuleMap::setTarget(const TargetInfo &Target) { +  assert((!this->Target || this->Target == &Target) && +         "Improper target override"); +  this->Target = &Target; +} + +/// "Sanitize" a filename so that it can be used as an identifier. +static StringRef sanitizeFilenameAsIdentifier(StringRef Name, +                                              SmallVectorImpl<char> &Buffer) { +  if (Name.empty()) +    return Name; + +  if (!isValidIdentifier(Name)) { +    // If we don't already have something with the form of an identifier, +    // create a buffer with the sanitized name. +    Buffer.clear(); +    if (isDigit(Name[0])) +      Buffer.push_back('_'); +    Buffer.reserve(Buffer.size() + Name.size()); +    for (unsigned I = 0, N = Name.size(); I != N; ++I) { +      if (isIdentifierBody(Name[I])) +        Buffer.push_back(Name[I]); +      else +        Buffer.push_back('_'); +    } + +    Name = StringRef(Buffer.data(), Buffer.size()); +  } + +  while (llvm::StringSwitch<bool>(Name) +#define KEYWORD(Keyword,Conditions) .Case(#Keyword, true) +#define ALIAS(Keyword, AliasOf, Conditions) .Case(Keyword, true) +#include "clang/Basic/TokenKinds.def" +           .Default(false)) { +    if (Name.data() != Buffer.data()) +      Buffer.append(Name.begin(), Name.end()); +    Buffer.push_back('_'); +    Name = StringRef(Buffer.data(), Buffer.size()); +  } + +  return Name; +} + +/// Determine whether the given file name is the name of a builtin +/// header, supplied by Clang to replace, override, or augment existing system +/// headers. +bool ModuleMap::isBuiltinHeader(StringRef FileName) { +  return llvm::StringSwitch<bool>(FileName) +           .Case("float.h", true) +           .Case("iso646.h", true) +           .Case("limits.h", true) +           .Case("stdalign.h", true) +           .Case("stdarg.h", true) +           .Case("stdatomic.h", true) +           .Case("stdbool.h", true) +           .Case("stddef.h", true) +           .Case("stdint.h", true) +           .Case("tgmath.h", true) +           .Case("unwind.h", true) +           .Default(false); +} + +ModuleMap::HeadersMap::iterator +ModuleMap::findKnownHeader(const FileEntry *File) { +  resolveHeaderDirectives(File); +  HeadersMap::iterator Known = Headers.find(File); +  if (HeaderInfo.getHeaderSearchOpts().ImplicitModuleMaps && +      Known == Headers.end() && File->getDir() == BuiltinIncludeDir && +      ModuleMap::isBuiltinHeader(llvm::sys::path::filename(File->getName()))) { +    HeaderInfo.loadTopLevelSystemModules(); +    return Headers.find(File); +  } +  return Known; +} + +ModuleMap::KnownHeader +ModuleMap::findHeaderInUmbrellaDirs(const FileEntry *File, +                    SmallVectorImpl<const DirectoryEntry *> &IntermediateDirs) { +  if (UmbrellaDirs.empty()) +    return {}; + +  const DirectoryEntry *Dir = File->getDir(); +  assert(Dir && "file in no directory"); + +  // Note: as an egregious but useful hack we use the real path here, because +  // frameworks moving from top-level frameworks to embedded frameworks tend +  // to be symlinked from the top-level location to the embedded location, +  // and we need to resolve lookups as if we had found the embedded location. +  StringRef DirName = SourceMgr.getFileManager().getCanonicalName(Dir); + +  // Keep walking up the directory hierarchy, looking for a directory with +  // an umbrella header. +  do { +    auto KnownDir = UmbrellaDirs.find(Dir); +    if (KnownDir != UmbrellaDirs.end()) +      return KnownHeader(KnownDir->second, NormalHeader); + +    IntermediateDirs.push_back(Dir); + +    // Retrieve our parent path. +    DirName = llvm::sys::path::parent_path(DirName); +    if (DirName.empty()) +      break; + +    // Resolve the parent path to a directory entry. +    if (auto DirEntry = SourceMgr.getFileManager().getDirectory(DirName)) +      Dir = *DirEntry; +    else +      Dir = nullptr; +  } while (Dir); +  return {}; +} + +static bool violatesPrivateInclude(Module *RequestingModule, +                                   const FileEntry *IncFileEnt, +                                   ModuleMap::KnownHeader Header) { +#ifndef NDEBUG +  if (Header.getRole() & ModuleMap::PrivateHeader) { +    // Check for consistency between the module header role +    // as obtained from the lookup and as obtained from the module. +    // This check is not cheap, so enable it only for debugging. +    bool IsPrivate = false; +    SmallVectorImpl<Module::Header> *HeaderList[] = { +        &Header.getModule()->Headers[Module::HK_Private], +        &Header.getModule()->Headers[Module::HK_PrivateTextual]}; +    for (auto *Hs : HeaderList) +      IsPrivate |= +          std::find_if(Hs->begin(), Hs->end(), [&](const Module::Header &H) { +            return H.Entry == IncFileEnt; +          }) != Hs->end(); +    assert(IsPrivate && "inconsistent headers and roles"); +  } +#endif +  return !Header.isAccessibleFrom(RequestingModule); +} + +static Module *getTopLevelOrNull(Module *M) { +  return M ? M->getTopLevelModule() : nullptr; +} + +void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule, +                                        bool RequestingModuleIsModuleInterface, +                                        SourceLocation FilenameLoc, +                                        StringRef Filename, +                                        const FileEntry *File) { +  // No errors for indirect modules. This may be a bit of a problem for modules +  // with no source files. +  if (getTopLevelOrNull(RequestingModule) != getTopLevelOrNull(SourceModule)) +    return; + +  if (RequestingModule) { +    resolveUses(RequestingModule, /*Complain=*/false); +    resolveHeaderDirectives(RequestingModule); +  } + +  bool Excluded = false; +  Module *Private = nullptr; +  Module *NotUsed = nullptr; + +  HeadersMap::iterator Known = findKnownHeader(File); +  if (Known != Headers.end()) { +    for (const KnownHeader &Header : Known->second) { +      // Remember private headers for later printing of a diagnostic. +      if (violatesPrivateInclude(RequestingModule, File, Header)) { +        Private = Header.getModule(); +        continue; +      } + +      // If uses need to be specified explicitly, we are only allowed to return +      // modules that are explicitly used by the requesting module. +      if (RequestingModule && LangOpts.ModulesDeclUse && +          !RequestingModule->directlyUses(Header.getModule())) { +        NotUsed = Header.getModule(); +        continue; +      } + +      // We have found a module that we can happily use. +      return; +    } + +    Excluded = true; +  } + +  // We have found a header, but it is private. +  if (Private) { +    Diags.Report(FilenameLoc, diag::warn_use_of_private_header_outside_module) +        << Filename; +    return; +  } + +  // We have found a module, but we don't use it. +  if (NotUsed) { +    Diags.Report(FilenameLoc, diag::err_undeclared_use_of_module) +        << RequestingModule->getTopLevelModule()->Name << Filename; +    return; +  } + +  if (Excluded || isHeaderInUmbrellaDirs(File)) +    return; + +  // At this point, only non-modular includes remain. + +  if (RequestingModule && LangOpts.ModulesStrictDeclUse) { +    Diags.Report(FilenameLoc, diag::err_undeclared_use_of_module) +        << RequestingModule->getTopLevelModule()->Name << Filename; +  } else if (RequestingModule && RequestingModuleIsModuleInterface && +             LangOpts.isCompilingModule()) { +    // Do not diagnose when we are not compiling a module. +    diag::kind DiagID = RequestingModule->getTopLevelModule()->IsFramework ? +        diag::warn_non_modular_include_in_framework_module : +        diag::warn_non_modular_include_in_module; +    Diags.Report(FilenameLoc, DiagID) << RequestingModule->getFullModuleName() +        << File->getName(); +  } +} + +static bool isBetterKnownHeader(const ModuleMap::KnownHeader &New, +                                const ModuleMap::KnownHeader &Old) { +  // Prefer available modules. +  if (New.getModule()->isAvailable() && !Old.getModule()->isAvailable()) +    return true; + +  // Prefer a public header over a private header. +  if ((New.getRole() & ModuleMap::PrivateHeader) != +      (Old.getRole() & ModuleMap::PrivateHeader)) +    return !(New.getRole() & ModuleMap::PrivateHeader); + +  // Prefer a non-textual header over a textual header. +  if ((New.getRole() & ModuleMap::TextualHeader) != +      (Old.getRole() & ModuleMap::TextualHeader)) +    return !(New.getRole() & ModuleMap::TextualHeader); + +  // Don't have a reason to choose between these. Just keep the first one. +  return false; +} + +ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File, +                                                      bool AllowTextual) { +  auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader { +    if (!AllowTextual && R.getRole() & ModuleMap::TextualHeader) +      return {}; +    return R; +  }; + +  HeadersMap::iterator Known = findKnownHeader(File); +  if (Known != Headers.end()) { +    ModuleMap::KnownHeader Result; +    // Iterate over all modules that 'File' is part of to find the best fit. +    for (KnownHeader &H : Known->second) { +      // Prefer a header from the source module over all others. +      if (H.getModule()->getTopLevelModule() == SourceModule) +        return MakeResult(H); +      if (!Result || isBetterKnownHeader(H, Result)) +        Result = H; +    } +    return MakeResult(Result); +  } + +  return MakeResult(findOrCreateModuleForHeaderInUmbrellaDir(File)); +} + +ModuleMap::KnownHeader +ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) { +  assert(!Headers.count(File) && "already have a module for this header"); + +  SmallVector<const DirectoryEntry *, 2> SkippedDirs; +  KnownHeader H = findHeaderInUmbrellaDirs(File, SkippedDirs); +  if (H) { +    Module *Result = H.getModule(); + +    // Search up the module stack until we find a module with an umbrella +    // directory. +    Module *UmbrellaModule = Result; +    while (!UmbrellaModule->getUmbrellaDir() && UmbrellaModule->Parent) +      UmbrellaModule = UmbrellaModule->Parent; + +    if (UmbrellaModule->InferSubmodules) { +      const FileEntry *UmbrellaModuleMap = +          getModuleMapFileForUniquing(UmbrellaModule); + +      // Infer submodules for each of the directories we found between +      // the directory of the umbrella header and the directory where +      // the actual header is located. +      bool Explicit = UmbrellaModule->InferExplicitSubmodules; + +      for (unsigned I = SkippedDirs.size(); I != 0; --I) { +        // Find or create the module that corresponds to this directory name. +        SmallString<32> NameBuf; +        StringRef Name = sanitizeFilenameAsIdentifier( +            llvm::sys::path::stem(SkippedDirs[I-1]->getName()), NameBuf); +        Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, +                                    Explicit).first; +        InferredModuleAllowedBy[Result] = UmbrellaModuleMap; +        Result->IsInferred = true; + +        // Associate the module and the directory. +        UmbrellaDirs[SkippedDirs[I-1]] = Result; + +        // If inferred submodules export everything they import, add a +        // wildcard to the set of exports. +        if (UmbrellaModule->InferExportWildcard && Result->Exports.empty()) +          Result->Exports.push_back(Module::ExportDecl(nullptr, true)); +      } + +      // Infer a submodule with the same name as this header file. +      SmallString<32> NameBuf; +      StringRef Name = sanitizeFilenameAsIdentifier( +                         llvm::sys::path::stem(File->getName()), NameBuf); +      Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, +                                  Explicit).first; +      InferredModuleAllowedBy[Result] = UmbrellaModuleMap; +      Result->IsInferred = true; +      Result->addTopHeader(File); + +      // If inferred submodules export everything they import, add a +      // wildcard to the set of exports. +      if (UmbrellaModule->InferExportWildcard && Result->Exports.empty()) +        Result->Exports.push_back(Module::ExportDecl(nullptr, true)); +    } else { +      // Record each of the directories we stepped through as being part of +      // the module we found, since the umbrella header covers them all. +      for (unsigned I = 0, N = SkippedDirs.size(); I != N; ++I) +        UmbrellaDirs[SkippedDirs[I]] = Result; +    } + +    KnownHeader Header(Result, NormalHeader); +    Headers[File].push_back(Header); +    return Header; +  } + +  return {}; +} + +ArrayRef<ModuleMap::KnownHeader> +ModuleMap::findAllModulesForHeader(const FileEntry *File) const { +  resolveHeaderDirectives(File); +  auto It = Headers.find(File); +  if (It == Headers.end()) +    return None; +  return It->second; +} + +bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const { +  return isHeaderUnavailableInModule(Header, nullptr); +} + +bool +ModuleMap::isHeaderUnavailableInModule(const FileEntry *Header, +                                       const Module *RequestingModule) const { +  resolveHeaderDirectives(Header); +  HeadersMap::const_iterator Known = Headers.find(Header); +  if (Known != Headers.end()) { +    for (SmallVectorImpl<KnownHeader>::const_iterator +             I = Known->second.begin(), +             E = Known->second.end(); +         I != E; ++I) { + +      if (I->isAvailable() && +          (!RequestingModule || +           I->getModule()->isSubModuleOf(RequestingModule))) { +        // When no requesting module is available, the caller is looking if a +        // header is part a module by only looking into the module map. This is +        // done by warn_uncovered_module_header checks; don't consider textual +        // headers part of it in this mode, otherwise we get misleading warnings +        // that a umbrella header is not including a textual header. +        if (!RequestingModule && I->getRole() == ModuleMap::TextualHeader) +          continue; +        return false; +      } +    } +    return true; +  } + +  const DirectoryEntry *Dir = Header->getDir(); +  SmallVector<const DirectoryEntry *, 2> SkippedDirs; +  StringRef DirName = Dir->getName(); + +  auto IsUnavailable = [&](const Module *M) { +    return !M->isAvailable() && (!RequestingModule || +                                 M->isSubModuleOf(RequestingModule)); +  }; + +  // Keep walking up the directory hierarchy, looking for a directory with +  // an umbrella header. +  do { +    llvm::DenseMap<const DirectoryEntry *, Module *>::const_iterator KnownDir +      = UmbrellaDirs.find(Dir); +    if (KnownDir != UmbrellaDirs.end()) { +      Module *Found = KnownDir->second; +      if (IsUnavailable(Found)) +        return true; + +      // Search up the module stack until we find a module with an umbrella +      // directory. +      Module *UmbrellaModule = Found; +      while (!UmbrellaModule->getUmbrellaDir() && UmbrellaModule->Parent) +        UmbrellaModule = UmbrellaModule->Parent; + +      if (UmbrellaModule->InferSubmodules) { +        for (unsigned I = SkippedDirs.size(); I != 0; --I) { +          // Find or create the module that corresponds to this directory name. +          SmallString<32> NameBuf; +          StringRef Name = sanitizeFilenameAsIdentifier( +                             llvm::sys::path::stem(SkippedDirs[I-1]->getName()), +                             NameBuf); +          Found = lookupModuleQualified(Name, Found); +          if (!Found) +            return false; +          if (IsUnavailable(Found)) +            return true; +        } + +        // Infer a submodule with the same name as this header file. +        SmallString<32> NameBuf; +        StringRef Name = sanitizeFilenameAsIdentifier( +                           llvm::sys::path::stem(Header->getName()), +                           NameBuf); +        Found = lookupModuleQualified(Name, Found); +        if (!Found) +          return false; +      } + +      return IsUnavailable(Found); +    } + +    SkippedDirs.push_back(Dir); + +    // Retrieve our parent path. +    DirName = llvm::sys::path::parent_path(DirName); +    if (DirName.empty()) +      break; + +    // Resolve the parent path to a directory entry. +    if (auto DirEntry = SourceMgr.getFileManager().getDirectory(DirName)) +      Dir = *DirEntry; +    else +      Dir = nullptr; +  } while (Dir); + +  return false; +} + +Module *ModuleMap::findModule(StringRef Name) const { +  llvm::StringMap<Module *>::const_iterator Known = Modules.find(Name); +  if (Known != Modules.end()) +    return Known->getValue(); + +  return nullptr; +} + +Module *ModuleMap::lookupModuleUnqualified(StringRef Name, +                                           Module *Context) const { +  for(; Context; Context = Context->Parent) { +    if (Module *Sub = lookupModuleQualified(Name, Context)) +      return Sub; +  } + +  return findModule(Name); +} + +Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ +  if (!Context) +    return findModule(Name); + +  return Context->findSubmodule(Name); +} + +std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, +                                                        Module *Parent, +                                                        bool IsFramework, +                                                        bool IsExplicit) { +  // Try to find an existing module with this name. +  if (Module *Sub = lookupModuleQualified(Name, Parent)) +    return std::make_pair(Sub, false); + +  // Create a new module with this name. +  Module *Result = new Module(Name, SourceLocation(), Parent, IsFramework, +                              IsExplicit, NumCreatedModules++); +  if (!Parent) { +    if (LangOpts.CurrentModule == Name) +      SourceModule = Result; +    Modules[Name] = Result; +    ModuleScopeIDs[Result] = CurrentModuleScopeID; +  } +  return std::make_pair(Result, true); +} + +Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc) { +  PendingSubmodules.emplace_back( +      new Module("<global>", Loc, nullptr, /*IsFramework*/ false, +                 /*IsExplicit*/ true, NumCreatedModules++)); +  PendingSubmodules.back()->Kind = Module::GlobalModuleFragment; +  return PendingSubmodules.back().get(); +} + +Module * +ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent, +                                                       SourceLocation Loc) { +  auto *Result = +      new Module("<private>", Loc, Parent, /*IsFramework*/ false, +                 /*IsExplicit*/ true, NumCreatedModules++); +  Result->Kind = Module::PrivateModuleFragment; +  return Result; +} + +Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, +                                                StringRef Name, +                                                Module *GlobalModule) { +  assert(LangOpts.CurrentModule == Name && "module name mismatch"); +  assert(!Modules[Name] && "redefining existing module"); + +  auto *Result = +      new Module(Name, Loc, nullptr, /*IsFramework*/ false, +                 /*IsExplicit*/ false, NumCreatedModules++); +  Result->Kind = Module::ModuleInterfaceUnit; +  Modules[Name] = SourceModule = Result; + +  // Reparent the current global module fragment as a submodule of this module. +  for (auto &Submodule : PendingSubmodules) { +    Submodule->setParent(Result); +    Submodule.release(); // now owned by parent +  } +  PendingSubmodules.clear(); + +  // Mark the main source file as being within the newly-created module so that +  // declarations and macros are properly visibility-restricted to it. +  auto *MainFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()); +  assert(MainFile && "no input file for module interface"); +  Headers[MainFile].push_back(KnownHeader(Result, PrivateHeader)); + +  return Result; +} + +Module *ModuleMap::createHeaderModule(StringRef Name, +                                      ArrayRef<Module::Header> Headers) { +  assert(LangOpts.CurrentModule == Name && "module name mismatch"); +  assert(!Modules[Name] && "redefining existing module"); + +  auto *Result = +      new Module(Name, SourceLocation(), nullptr, /*IsFramework*/ false, +                 /*IsExplicit*/ false, NumCreatedModules++); +  Result->Kind = Module::ModuleInterfaceUnit; +  Modules[Name] = SourceModule = Result; + +  for (const Module::Header &H : Headers) { +    auto *M = new Module(H.NameAsWritten, SourceLocation(), Result, +                         /*IsFramework*/ false, +                         /*IsExplicit*/ true, NumCreatedModules++); +    // Header modules are implicitly 'export *'. +    M->Exports.push_back(Module::ExportDecl(nullptr, true)); +    addHeader(M, H, NormalHeader); +  } + +  return Result; +} + +/// For a framework module, infer the framework against which we +/// should link. +static void inferFrameworkLink(Module *Mod, const DirectoryEntry *FrameworkDir, +                               FileManager &FileMgr) { +  assert(Mod->IsFramework && "Can only infer linking for framework modules"); +  assert(!Mod->isSubFramework() && +         "Can only infer linking for top-level frameworks"); + +  SmallString<128> LibName; +  LibName += FrameworkDir->getName(); +  llvm::sys::path::append(LibName, Mod->Name); + +  // The library name of a framework has more than one possible extension since +  // the introduction of the text-based dynamic library format. We need to check +  // for both before we give up. +  for (const char *extension : {"", ".tbd"}) { +    llvm::sys::path::replace_extension(LibName, extension); +    if (FileMgr.getFile(LibName)) { +      Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name, +                                                       /*IsFramework=*/true)); +      return; +    } +  } +} + +Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir, +                                        bool IsSystem, Module *Parent) { +  Attributes Attrs; +  Attrs.IsSystem = IsSystem; +  return inferFrameworkModule(FrameworkDir, Attrs, Parent); +} + +Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir, +                                        Attributes Attrs, Module *Parent) { +  // Note: as an egregious but useful hack we use the real path here, because +  // we might be looking at an embedded framework that symlinks out to a +  // top-level framework, and we need to infer as if we were naming the +  // top-level framework. +  StringRef FrameworkDirName = +      SourceMgr.getFileManager().getCanonicalName(FrameworkDir); + +  // In case this is a case-insensitive filesystem, use the canonical +  // directory name as the ModuleName, since modules are case-sensitive. +  // FIXME: we should be able to give a fix-it hint for the correct spelling. +  SmallString<32> ModuleNameStorage; +  StringRef ModuleName = sanitizeFilenameAsIdentifier( +      llvm::sys::path::stem(FrameworkDirName), ModuleNameStorage); + +  // Check whether we've already found this module. +  if (Module *Mod = lookupModuleQualified(ModuleName, Parent)) +    return Mod; + +  FileManager &FileMgr = SourceMgr.getFileManager(); + +  // If the framework has a parent path from which we're allowed to infer +  // a framework module, do so. +  const FileEntry *ModuleMapFile = nullptr; +  if (!Parent) { +    // Determine whether we're allowed to infer a module map. +    bool canInfer = false; +    if (llvm::sys::path::has_parent_path(FrameworkDirName)) { +      // Figure out the parent path. +      StringRef Parent = llvm::sys::path::parent_path(FrameworkDirName); +      if (auto ParentDir = FileMgr.getDirectory(Parent)) { +        // Check whether we have already looked into the parent directory +        // for a module map. +        llvm::DenseMap<const DirectoryEntry *, InferredDirectory>::const_iterator +          inferred = InferredDirectories.find(*ParentDir); +        if (inferred == InferredDirectories.end()) { +          // We haven't looked here before. Load a module map, if there is +          // one. +          bool IsFrameworkDir = Parent.endswith(".framework"); +          if (const FileEntry *ModMapFile = +                HeaderInfo.lookupModuleMapFile(*ParentDir, IsFrameworkDir)) { +            parseModuleMapFile(ModMapFile, Attrs.IsSystem, *ParentDir); +            inferred = InferredDirectories.find(*ParentDir); +          } + +          if (inferred == InferredDirectories.end()) +            inferred = InferredDirectories.insert( +                         std::make_pair(*ParentDir, InferredDirectory())).first; +        } + +        if (inferred->second.InferModules) { +          // We're allowed to infer for this directory, but make sure it's okay +          // to infer this particular module. +          StringRef Name = llvm::sys::path::stem(FrameworkDirName); +          canInfer = std::find(inferred->second.ExcludedModules.begin(), +                               inferred->second.ExcludedModules.end(), +                               Name) == inferred->second.ExcludedModules.end(); + +          Attrs.IsSystem |= inferred->second.Attrs.IsSystem; +          Attrs.IsExternC |= inferred->second.Attrs.IsExternC; +          Attrs.IsExhaustive |= inferred->second.Attrs.IsExhaustive; +          Attrs.NoUndeclaredIncludes |= +              inferred->second.Attrs.NoUndeclaredIncludes; +          ModuleMapFile = inferred->second.ModuleMapFile; +        } +      } +    } + +    // If we're not allowed to infer a framework module, don't. +    if (!canInfer) +      return nullptr; +  } else +    ModuleMapFile = getModuleMapFileForUniquing(Parent); + + +  // Look for an umbrella header. +  SmallString<128> UmbrellaName = StringRef(FrameworkDir->getName()); +  llvm::sys::path::append(UmbrellaName, "Headers", ModuleName + ".h"); +  auto UmbrellaHeader = FileMgr.getFile(UmbrellaName); + +  // FIXME: If there's no umbrella header, we could probably scan the +  // framework to load *everything*. But, it's not clear that this is a good +  // idea. +  if (!UmbrellaHeader) +    return nullptr; + +  Module *Result = new Module(ModuleName, SourceLocation(), Parent, +                              /*IsFramework=*/true, /*IsExplicit=*/false, +                              NumCreatedModules++); +  InferredModuleAllowedBy[Result] = ModuleMapFile; +  Result->IsInferred = true; +  if (!Parent) { +    if (LangOpts.CurrentModule == ModuleName) +      SourceModule = Result; +    Modules[ModuleName] = Result; +    ModuleScopeIDs[Result] = CurrentModuleScopeID; +  } + +  Result->IsSystem |= Attrs.IsSystem; +  Result->IsExternC |= Attrs.IsExternC; +  Result->ConfigMacrosExhaustive |= Attrs.IsExhaustive; +  Result->NoUndeclaredIncludes |= Attrs.NoUndeclaredIncludes; +  Result->Directory = FrameworkDir; + +  // umbrella header "umbrella-header-name" +  // +  // The "Headers/" component of the name is implied because this is +  // a framework module. +  setUmbrellaHeader(Result, *UmbrellaHeader, ModuleName + ".h"); + +  // export * +  Result->Exports.push_back(Module::ExportDecl(nullptr, true)); + +  // module * { export * } +  Result->InferSubmodules = true; +  Result->InferExportWildcard = true; + +  // Look for subframeworks. +  std::error_code EC; +  SmallString<128> SubframeworksDirName +    = StringRef(FrameworkDir->getName()); +  llvm::sys::path::append(SubframeworksDirName, "Frameworks"); +  llvm::sys::path::native(SubframeworksDirName); +  llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); +  for (llvm::vfs::directory_iterator +           Dir = FS.dir_begin(SubframeworksDirName, EC), +           DirEnd; +       Dir != DirEnd && !EC; Dir.increment(EC)) { +    if (!StringRef(Dir->path()).endswith(".framework")) +      continue; + +    if (auto SubframeworkDir = +            FileMgr.getDirectory(Dir->path())) { +      // Note: as an egregious but useful hack, we use the real path here and +      // check whether it is actually a subdirectory of the parent directory. +      // This will not be the case if the 'subframework' is actually a symlink +      // out to a top-level framework. +      StringRef SubframeworkDirName = +          FileMgr.getCanonicalName(*SubframeworkDir); +      bool FoundParent = false; +      do { +        // Get the parent directory name. +        SubframeworkDirName +          = llvm::sys::path::parent_path(SubframeworkDirName); +        if (SubframeworkDirName.empty()) +          break; + +        if (auto SubDir = FileMgr.getDirectory(SubframeworkDirName)) { +          if (*SubDir == FrameworkDir) { +            FoundParent = true; +            break; +          } +        } +      } while (true); + +      if (!FoundParent) +        continue; + +      // FIXME: Do we want to warn about subframeworks without umbrella headers? +      inferFrameworkModule(*SubframeworkDir, Attrs, Result); +    } +  } + +  // If the module is a top-level framework, automatically link against the +  // framework. +  if (!Result->isSubFramework()) { +    inferFrameworkLink(Result, FrameworkDir, FileMgr); +  } + +  return Result; +} + +Module *ModuleMap::createShadowedModule(StringRef Name, bool IsFramework, +                                        Module *ShadowingModule) { + +  // Create a new module with this name. +  Module *Result = +      new Module(Name, SourceLocation(), /*Parent=*/nullptr, IsFramework, +                 /*IsExplicit=*/false, NumCreatedModules++); +  Result->ShadowingModule = ShadowingModule; +  Result->IsAvailable = false; +  ModuleScopeIDs[Result] = CurrentModuleScopeID; +  ShadowModules.push_back(Result); + +  return Result; +} + +void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader, +                                  Twine NameAsWritten) { +  Headers[UmbrellaHeader].push_back(KnownHeader(Mod, NormalHeader)); +  Mod->Umbrella = UmbrellaHeader; +  Mod->UmbrellaAsWritten = NameAsWritten.str(); +  UmbrellaDirs[UmbrellaHeader->getDir()] = Mod; + +  // Notify callbacks that we just added a new header. +  for (const auto &Cb : Callbacks) +    Cb->moduleMapAddUmbrellaHeader(&SourceMgr.getFileManager(), UmbrellaHeader); +} + +void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir, +                               Twine NameAsWritten) { +  Mod->Umbrella = UmbrellaDir; +  Mod->UmbrellaAsWritten = NameAsWritten.str(); +  UmbrellaDirs[UmbrellaDir] = Mod; +} + +void ModuleMap::addUnresolvedHeader(Module *Mod, +                                    Module::UnresolvedHeaderDirective Header, +                                    bool &NeedsFramework) { +  // If there is a builtin counterpart to this file, add it now so it can +  // wrap the system header. +  if (resolveAsBuiltinHeader(Mod, Header)) { +    // If we have both a builtin and system version of the file, the +    // builtin version may want to inject macros into the system header, so +    // force the system header to be treated as a textual header in this +    // case. +    Header.Kind = headerRoleToKind(ModuleMap::ModuleHeaderRole( +        headerKindToRole(Header.Kind) | ModuleMap::TextualHeader)); +    Header.HasBuiltinHeader = true; +  } + +  // If possible, don't stat the header until we need to. This requires the +  // user to have provided us with some stat information about the file. +  // FIXME: Add support for lazily stat'ing umbrella headers and excluded +  // headers. +  if ((Header.Size || Header.ModTime) && !Header.IsUmbrella && +      Header.Kind != Module::HK_Excluded) { +    // We expect more variation in mtime than size, so if we're given both, +    // use the mtime as the key. +    if (Header.ModTime) +      LazyHeadersByModTime[*Header.ModTime].push_back(Mod); +    else +      LazyHeadersBySize[*Header.Size].push_back(Mod); +    Mod->UnresolvedHeaders.push_back(Header); +    return; +  } + +  // We don't have stat information or can't defer looking this file up. +  // Perform the lookup now. +  resolveHeader(Mod, Header, NeedsFramework); +} + +void ModuleMap::resolveHeaderDirectives(const FileEntry *File) const { +  auto BySize = LazyHeadersBySize.find(File->getSize()); +  if (BySize != LazyHeadersBySize.end()) { +    for (auto *M : BySize->second) +      resolveHeaderDirectives(M); +    LazyHeadersBySize.erase(BySize); +  } + +  auto ByModTime = LazyHeadersByModTime.find(File->getModificationTime()); +  if (ByModTime != LazyHeadersByModTime.end()) { +    for (auto *M : ByModTime->second) +      resolveHeaderDirectives(M); +    LazyHeadersByModTime.erase(ByModTime); +  } +} + +void ModuleMap::resolveHeaderDirectives(Module *Mod) const { +  bool NeedsFramework = false; +  for (auto &Header : Mod->UnresolvedHeaders) +    // This operation is logically const; we're just changing how we represent +    // the header information for this file. +    const_cast<ModuleMap*>(this)->resolveHeader(Mod, Header, NeedsFramework); +  Mod->UnresolvedHeaders.clear(); +} + +void ModuleMap::addHeader(Module *Mod, Module::Header Header, +                          ModuleHeaderRole Role, bool Imported) { +  KnownHeader KH(Mod, Role); + +  // Only add each header to the headers list once. +  // FIXME: Should we diagnose if a header is listed twice in the +  // same module definition? +  auto &HeaderList = Headers[Header.Entry]; +  for (auto H : HeaderList) +    if (H == KH) +      return; + +  HeaderList.push_back(KH); +  Mod->Headers[headerRoleToKind(Role)].push_back(Header); + +  bool isCompilingModuleHeader = +      LangOpts.isCompilingModule() && Mod->getTopLevelModule() == SourceModule; +  if (!Imported || isCompilingModuleHeader) { +    // When we import HeaderFileInfo, the external source is expected to +    // set the isModuleHeader flag itself. +    HeaderInfo.MarkFileModuleHeader(Header.Entry, Role, +                                    isCompilingModuleHeader); +  } + +  // Notify callbacks that we just added a new header. +  for (const auto &Cb : Callbacks) +    Cb->moduleMapAddHeader(Header.Entry->getName()); +} + +void ModuleMap::excludeHeader(Module *Mod, Module::Header Header) { +  // Add this as a known header so we won't implicitly add it to any +  // umbrella directory module. +  // FIXME: Should we only exclude it from umbrella modules within the +  // specified module? +  (void) Headers[Header.Entry]; + +  Mod->Headers[Module::HK_Excluded].push_back(std::move(Header)); +} + +const FileEntry * +ModuleMap::getContainingModuleMapFile(const Module *Module) const { +  if (Module->DefinitionLoc.isInvalid()) +    return nullptr; + +  return SourceMgr.getFileEntryForID( +           SourceMgr.getFileID(Module->DefinitionLoc)); +} + +const FileEntry *ModuleMap::getModuleMapFileForUniquing(const Module *M) const { +  if (M->IsInferred) { +    assert(InferredModuleAllowedBy.count(M) && "missing inferred module map"); +    return InferredModuleAllowedBy.find(M)->second; +  } +  return getContainingModuleMapFile(M); +} + +void ModuleMap::setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap) { +  assert(M->IsInferred && "module not inferred"); +  InferredModuleAllowedBy[M] = ModMap; +} + +LLVM_DUMP_METHOD void ModuleMap::dump() { +  llvm::errs() << "Modules:"; +  for (llvm::StringMap<Module *>::iterator M = Modules.begin(), +                                        MEnd = Modules.end(); +       M != MEnd; ++M) +    M->getValue()->print(llvm::errs(), 2); + +  llvm::errs() << "Headers:"; +  for (HeadersMap::iterator H = Headers.begin(), HEnd = Headers.end(); +       H != HEnd; ++H) { +    llvm::errs() << "  \"" << H->first->getName() << "\" -> "; +    for (SmallVectorImpl<KnownHeader>::const_iterator I = H->second.begin(), +                                                      E = H->second.end(); +         I != E; ++I) { +      if (I != H->second.begin()) +        llvm::errs() << ","; +      llvm::errs() << I->getModule()->getFullModuleName(); +    } +    llvm::errs() << "\n"; +  } +} + +bool ModuleMap::resolveExports(Module *Mod, bool Complain) { +  auto Unresolved = std::move(Mod->UnresolvedExports); +  Mod->UnresolvedExports.clear(); +  for (auto &UE : Unresolved) { +    Module::ExportDecl Export = resolveExport(Mod, UE, Complain); +    if (Export.getPointer() || Export.getInt()) +      Mod->Exports.push_back(Export); +    else +      Mod->UnresolvedExports.push_back(UE); +  } +  return !Mod->UnresolvedExports.empty(); +} + +bool ModuleMap::resolveUses(Module *Mod, bool Complain) { +  auto Unresolved = std::move(Mod->UnresolvedDirectUses); +  Mod->UnresolvedDirectUses.clear(); +  for (auto &UDU : Unresolved) { +    Module *DirectUse = resolveModuleId(UDU, Mod, Complain); +    if (DirectUse) +      Mod->DirectUses.push_back(DirectUse); +    else +      Mod->UnresolvedDirectUses.push_back(UDU); +  } +  return !Mod->UnresolvedDirectUses.empty(); +} + +bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) { +  auto Unresolved = std::move(Mod->UnresolvedConflicts); +  Mod->UnresolvedConflicts.clear(); +  for (auto &UC : Unresolved) { +    if (Module *OtherMod = resolveModuleId(UC.Id, Mod, Complain)) { +      Module::Conflict Conflict; +      Conflict.Other = OtherMod; +      Conflict.Message = UC.Message; +      Mod->Conflicts.push_back(Conflict); +    } else +      Mod->UnresolvedConflicts.push_back(UC); +  } +  return !Mod->UnresolvedConflicts.empty(); +} + +//----------------------------------------------------------------------------// +// Module map file parser +//----------------------------------------------------------------------------// + +namespace clang { + +  /// A token in a module map file. +  struct MMToken { +    enum TokenKind { +      Comma, +      ConfigMacros, +      Conflict, +      EndOfFile, +      HeaderKeyword, +      Identifier, +      Exclaim, +      ExcludeKeyword, +      ExplicitKeyword, +      ExportKeyword, +      ExportAsKeyword, +      ExternKeyword, +      FrameworkKeyword, +      LinkKeyword, +      ModuleKeyword, +      Period, +      PrivateKeyword, +      UmbrellaKeyword, +      UseKeyword, +      RequiresKeyword, +      Star, +      StringLiteral, +      IntegerLiteral, +      TextualKeyword, +      LBrace, +      RBrace, +      LSquare, +      RSquare +    } Kind; + +    unsigned Location; +    unsigned StringLength; +    union { +      // If Kind != IntegerLiteral. +      const char *StringData; + +      // If Kind == IntegerLiteral. +      uint64_t IntegerValue; +    }; + +    void clear() { +      Kind = EndOfFile; +      Location = 0; +      StringLength = 0; +      StringData = nullptr; +    } + +    bool is(TokenKind K) const { return Kind == K; } + +    SourceLocation getLocation() const { +      return SourceLocation::getFromRawEncoding(Location); +    } + +    uint64_t getInteger() const { +      return Kind == IntegerLiteral ? IntegerValue : 0; +    } + +    StringRef getString() const { +      return Kind == IntegerLiteral ? StringRef() +                                    : StringRef(StringData, StringLength); +    } +  }; + +  class ModuleMapParser { +    Lexer &L; +    SourceManager &SourceMgr; + +    /// Default target information, used only for string literal +    /// parsing. +    const TargetInfo *Target; + +    DiagnosticsEngine &Diags; +    ModuleMap ⤅ + +    /// The current module map file. +    const FileEntry *ModuleMapFile; + +    /// Source location of most recent parsed module declaration +    SourceLocation CurrModuleDeclLoc; + +    /// The directory that file names in this module map file should +    /// be resolved relative to. +    const DirectoryEntry *Directory; + +    /// Whether this module map is in a system header directory. +    bool IsSystem; + +    /// Whether an error occurred. +    bool HadError = false; + +    /// Stores string data for the various string literals referenced +    /// during parsing. +    llvm::BumpPtrAllocator StringData; + +    /// The current token. +    MMToken Tok; + +    /// The active module. +    Module *ActiveModule = nullptr; + +    /// Whether a module uses the 'requires excluded' hack to mark its +    /// contents as 'textual'. +    /// +    /// On older Darwin SDK versions, 'requires excluded' is used to mark the +    /// contents of the Darwin.C.excluded (assert.h) and Tcl.Private modules as +    /// non-modular headers.  For backwards compatibility, we continue to +    /// support this idiom for just these modules, and map the headers to +    /// 'textual' to match the original intent. +    llvm::SmallPtrSet<Module *, 2> UsesRequiresExcludedHack; + +    /// Consume the current token and return its location. +    SourceLocation consumeToken(); + +    /// Skip tokens until we reach the a token with the given kind +    /// (or the end of the file). +    void skipUntil(MMToken::TokenKind K); + +    using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>; + +    bool parseModuleId(ModuleId &Id); +    void parseModuleDecl(); +    void parseExternModuleDecl(); +    void parseRequiresDecl(); +    void parseHeaderDecl(MMToken::TokenKind, SourceLocation LeadingLoc); +    void parseUmbrellaDirDecl(SourceLocation UmbrellaLoc); +    void parseExportDecl(); +    void parseExportAsDecl(); +    void parseUseDecl(); +    void parseLinkDecl(); +    void parseConfigMacros(); +    void parseConflict(); +    void parseInferredModuleDecl(bool Framework, bool Explicit); + +    /// Private modules are canonicalized as Foo_Private. Clang provides extra +    /// module map search logic to find the appropriate private module when PCH +    /// is used with implicit module maps. Warn when private modules are written +    /// in other ways (FooPrivate and Foo.Private), providing notes and fixits. +    void diagnosePrivateModules(SourceLocation ExplicitLoc, +                                SourceLocation FrameworkLoc); + +    using Attributes = ModuleMap::Attributes; + +    bool parseOptionalAttributes(Attributes &Attrs); + +  public: +    explicit ModuleMapParser(Lexer &L, SourceManager &SourceMgr, +                             const TargetInfo *Target, DiagnosticsEngine &Diags, +                             ModuleMap &Map, const FileEntry *ModuleMapFile, +                             const DirectoryEntry *Directory, bool IsSystem) +        : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), +          ModuleMapFile(ModuleMapFile), Directory(Directory), +          IsSystem(IsSystem) { +      Tok.clear(); +      consumeToken(); +    } + +    bool parseModuleMapFile(); + +    bool terminatedByDirective() { return false; } +    SourceLocation getLocation() { return Tok.getLocation(); } +  }; + +} // namespace clang + +SourceLocation ModuleMapParser::consumeToken() { +  SourceLocation Result = Tok.getLocation(); + +retry: +  Tok.clear(); +  Token LToken; +  L.LexFromRawLexer(LToken); +  Tok.Location = LToken.getLocation().getRawEncoding(); +  switch (LToken.getKind()) { +  case tok::raw_identifier: { +    StringRef RI = LToken.getRawIdentifier(); +    Tok.StringData = RI.data(); +    Tok.StringLength = RI.size(); +    Tok.Kind = llvm::StringSwitch<MMToken::TokenKind>(RI) +                 .Case("config_macros", MMToken::ConfigMacros) +                 .Case("conflict", MMToken::Conflict) +                 .Case("exclude", MMToken::ExcludeKeyword) +                 .Case("explicit", MMToken::ExplicitKeyword) +                 .Case("export", MMToken::ExportKeyword) +                 .Case("export_as", MMToken::ExportAsKeyword) +                 .Case("extern", MMToken::ExternKeyword) +                 .Case("framework", MMToken::FrameworkKeyword) +                 .Case("header", MMToken::HeaderKeyword) +                 .Case("link", MMToken::LinkKeyword) +                 .Case("module", MMToken::ModuleKeyword) +                 .Case("private", MMToken::PrivateKeyword) +                 .Case("requires", MMToken::RequiresKeyword) +                 .Case("textual", MMToken::TextualKeyword) +                 .Case("umbrella", MMToken::UmbrellaKeyword) +                 .Case("use", MMToken::UseKeyword) +                 .Default(MMToken::Identifier); +    break; +  } + +  case tok::comma: +    Tok.Kind = MMToken::Comma; +    break; + +  case tok::eof: +    Tok.Kind = MMToken::EndOfFile; +    break; + +  case tok::l_brace: +    Tok.Kind = MMToken::LBrace; +    break; + +  case tok::l_square: +    Tok.Kind = MMToken::LSquare; +    break; + +  case tok::period: +    Tok.Kind = MMToken::Period; +    break; + +  case tok::r_brace: +    Tok.Kind = MMToken::RBrace; +    break; + +  case tok::r_square: +    Tok.Kind = MMToken::RSquare; +    break; + +  case tok::star: +    Tok.Kind = MMToken::Star; +    break; + +  case tok::exclaim: +    Tok.Kind = MMToken::Exclaim; +    break; + +  case tok::string_literal: { +    if (LToken.hasUDSuffix()) { +      Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl); +      HadError = true; +      goto retry; +    } + +    // Parse the string literal. +    LangOptions LangOpts; +    StringLiteralParser StringLiteral(LToken, SourceMgr, LangOpts, *Target); +    if (StringLiteral.hadError) +      goto retry; + +    // Copy the string literal into our string data allocator. +    unsigned Length = StringLiteral.GetStringLength(); +    char *Saved = StringData.Allocate<char>(Length + 1); +    memcpy(Saved, StringLiteral.GetString().data(), Length); +    Saved[Length] = 0; + +    // Form the token. +    Tok.Kind = MMToken::StringLiteral; +    Tok.StringData = Saved; +    Tok.StringLength = Length; +    break; +  } + +  case tok::numeric_constant: { +    // We don't support any suffixes or other complications. +    SmallString<32> SpellingBuffer; +    SpellingBuffer.resize(LToken.getLength() + 1); +    const char *Start = SpellingBuffer.data(); +    unsigned Length = +        Lexer::getSpelling(LToken, Start, SourceMgr, L.getLangOpts()); +    uint64_t Value; +    if (StringRef(Start, Length).getAsInteger(0, Value)) { +      Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token); +      HadError = true; +      goto retry; +    } + +    Tok.Kind = MMToken::IntegerLiteral; +    Tok.IntegerValue = Value; +    break; +  } + +  case tok::comment: +    goto retry; + +  case tok::hash: +    // A module map can be terminated prematurely by +    //   #pragma clang module contents +    // When building the module, we'll treat the rest of the file as the +    // contents of the module. +    { +      auto NextIsIdent = [&](StringRef Str) -> bool { +        L.LexFromRawLexer(LToken); +        return !LToken.isAtStartOfLine() && LToken.is(tok::raw_identifier) && +               LToken.getRawIdentifier() == Str; +      }; +      if (NextIsIdent("pragma") && NextIsIdent("clang") && +          NextIsIdent("module") && NextIsIdent("contents")) { +        Tok.Kind = MMToken::EndOfFile; +        break; +      } +    } +    LLVM_FALLTHROUGH; + +  default: +    Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token); +    HadError = true; +    goto retry; +  } + +  return Result; +} + +void ModuleMapParser::skipUntil(MMToken::TokenKind K) { +  unsigned braceDepth = 0; +  unsigned squareDepth = 0; +  do { +    switch (Tok.Kind) { +    case MMToken::EndOfFile: +      return; + +    case MMToken::LBrace: +      if (Tok.is(K) && braceDepth == 0 && squareDepth == 0) +        return; + +      ++braceDepth; +      break; + +    case MMToken::LSquare: +      if (Tok.is(K) && braceDepth == 0 && squareDepth == 0) +        return; + +      ++squareDepth; +      break; + +    case MMToken::RBrace: +      if (braceDepth > 0) +        --braceDepth; +      else if (Tok.is(K)) +        return; +      break; + +    case MMToken::RSquare: +      if (squareDepth > 0) +        --squareDepth; +      else if (Tok.is(K)) +        return; +      break; + +    default: +      if (braceDepth == 0 && squareDepth == 0 && Tok.is(K)) +        return; +      break; +    } + +   consumeToken(); +  } while (true); +} + +/// Parse a module-id. +/// +///   module-id: +///     identifier +///     identifier '.' module-id +/// +/// \returns true if an error occurred, false otherwise. +bool ModuleMapParser::parseModuleId(ModuleId &Id) { +  Id.clear(); +  do { +    if (Tok.is(MMToken::Identifier) || Tok.is(MMToken::StringLiteral)) { +      Id.push_back(std::make_pair(Tok.getString(), Tok.getLocation())); +      consumeToken(); +    } else { +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module_name); +      return true; +    } + +    if (!Tok.is(MMToken::Period)) +      break; + +    consumeToken(); +  } while (true); + +  return false; +} + +namespace { + +  /// Enumerates the known attributes. +  enum AttributeKind { +    /// An unknown attribute. +    AT_unknown, + +    /// The 'system' attribute. +    AT_system, + +    /// The 'extern_c' attribute. +    AT_extern_c, + +    /// The 'exhaustive' attribute. +    AT_exhaustive, + +    /// The 'no_undeclared_includes' attribute. +    AT_no_undeclared_includes +  }; + +} // namespace + +/// Private modules are canonicalized as Foo_Private. Clang provides extra +/// module map search logic to find the appropriate private module when PCH +/// is used with implicit module maps. Warn when private modules are written +/// in other ways (FooPrivate and Foo.Private), providing notes and fixits. +void ModuleMapParser::diagnosePrivateModules(SourceLocation ExplicitLoc, +                                             SourceLocation FrameworkLoc) { +  auto GenNoteAndFixIt = [&](StringRef BadName, StringRef Canonical, +                             const Module *M, SourceRange ReplLoc) { +    auto D = Diags.Report(ActiveModule->DefinitionLoc, +                          diag::note_mmap_rename_top_level_private_module); +    D << BadName << M->Name; +    D << FixItHint::CreateReplacement(ReplLoc, Canonical); +  }; + +  for (auto E = Map.module_begin(); E != Map.module_end(); ++E) { +    auto const *M = E->getValue(); +    if (M->Directory != ActiveModule->Directory) +      continue; + +    SmallString<128> FullName(ActiveModule->getFullModuleName()); +    if (!FullName.startswith(M->Name) && !FullName.endswith("Private")) +      continue; +    SmallString<128> FixedPrivModDecl; +    SmallString<128> Canonical(M->Name); +    Canonical.append("_Private"); + +    // Foo.Private -> Foo_Private +    if (ActiveModule->Parent && ActiveModule->Name == "Private" && !M->Parent && +        M->Name == ActiveModule->Parent->Name) { +      Diags.Report(ActiveModule->DefinitionLoc, +                   diag::warn_mmap_mismatched_private_submodule) +          << FullName; + +      SourceLocation FixItInitBegin = CurrModuleDeclLoc; +      if (FrameworkLoc.isValid()) +        FixItInitBegin = FrameworkLoc; +      if (ExplicitLoc.isValid()) +        FixItInitBegin = ExplicitLoc; + +      if (FrameworkLoc.isValid() || ActiveModule->Parent->IsFramework) +        FixedPrivModDecl.append("framework "); +      FixedPrivModDecl.append("module "); +      FixedPrivModDecl.append(Canonical); + +      GenNoteAndFixIt(FullName, FixedPrivModDecl, M, +                      SourceRange(FixItInitBegin, ActiveModule->DefinitionLoc)); +      continue; +    } + +    // FooPrivate and whatnots -> Foo_Private +    if (!ActiveModule->Parent && !M->Parent && M->Name != ActiveModule->Name && +        ActiveModule->Name != Canonical) { +      Diags.Report(ActiveModule->DefinitionLoc, +                   diag::warn_mmap_mismatched_private_module_name) +          << ActiveModule->Name; +      GenNoteAndFixIt(ActiveModule->Name, Canonical, M, +                      SourceRange(ActiveModule->DefinitionLoc)); +    } +  } +} + +/// Parse a module declaration. +/// +///   module-declaration: +///     'extern' 'module' module-id string-literal +///     'explicit'[opt] 'framework'[opt] 'module' module-id attributes[opt] +///       { module-member* } +/// +///   module-member: +///     requires-declaration +///     header-declaration +///     submodule-declaration +///     export-declaration +///     export-as-declaration +///     link-declaration +/// +///   submodule-declaration: +///     module-declaration +///     inferred-submodule-declaration +void ModuleMapParser::parseModuleDecl() { +  assert(Tok.is(MMToken::ExplicitKeyword) || Tok.is(MMToken::ModuleKeyword) || +         Tok.is(MMToken::FrameworkKeyword) || Tok.is(MMToken::ExternKeyword)); +  if (Tok.is(MMToken::ExternKeyword)) { +    parseExternModuleDecl(); +    return; +  } + +  // Parse 'explicit' or 'framework' keyword, if present. +  SourceLocation ExplicitLoc; +  SourceLocation FrameworkLoc; +  bool Explicit = false; +  bool Framework = false; + +  // Parse 'explicit' keyword, if present. +  if (Tok.is(MMToken::ExplicitKeyword)) { +    ExplicitLoc = consumeToken(); +    Explicit = true; +  } + +  // Parse 'framework' keyword, if present. +  if (Tok.is(MMToken::FrameworkKeyword)) { +    FrameworkLoc = consumeToken(); +    Framework = true; +  } + +  // Parse 'module' keyword. +  if (!Tok.is(MMToken::ModuleKeyword)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module); +    consumeToken(); +    HadError = true; +    return; +  } +  CurrModuleDeclLoc = consumeToken(); // 'module' keyword + +  // If we have a wildcard for the module name, this is an inferred submodule. +  // Parse it. +  if (Tok.is(MMToken::Star)) +    return parseInferredModuleDecl(Framework, Explicit); + +  // Parse the module name. +  ModuleId Id; +  if (parseModuleId(Id)) { +    HadError = true; +    return; +  } + +  if (ActiveModule) { +    if (Id.size() > 1) { +      Diags.Report(Id.front().second, diag::err_mmap_nested_submodule_id) +        << SourceRange(Id.front().second, Id.back().second); + +      HadError = true; +      return; +    } +  } else if (Id.size() == 1 && Explicit) { +    // Top-level modules can't be explicit. +    Diags.Report(ExplicitLoc, diag::err_mmap_explicit_top_level); +    Explicit = false; +    ExplicitLoc = SourceLocation(); +    HadError = true; +  } + +  Module *PreviousActiveModule = ActiveModule; +  if (Id.size() > 1) { +    // This module map defines a submodule. Go find the module of which it +    // is a submodule. +    ActiveModule = nullptr; +    const Module *TopLevelModule = nullptr; +    for (unsigned I = 0, N = Id.size() - 1; I != N; ++I) { +      if (Module *Next = Map.lookupModuleQualified(Id[I].first, ActiveModule)) { +        if (I == 0) +          TopLevelModule = Next; +        ActiveModule = Next; +        continue; +      } + +      if (ActiveModule) { +        Diags.Report(Id[I].second, diag::err_mmap_missing_module_qualified) +          << Id[I].first +          << ActiveModule->getTopLevelModule()->getFullModuleName(); +      } else { +        Diags.Report(Id[I].second, diag::err_mmap_expected_module_name); +      } +      HadError = true; +      return; +    } + +    if (ModuleMapFile != Map.getContainingModuleMapFile(TopLevelModule)) { +      assert(ModuleMapFile != Map.getModuleMapFileForUniquing(TopLevelModule) && +             "submodule defined in same file as 'module *' that allowed its " +             "top-level module"); +      Map.addAdditionalModuleMapFile(TopLevelModule, ModuleMapFile); +    } +  } + +  StringRef ModuleName = Id.back().first; +  SourceLocation ModuleNameLoc = Id.back().second; + +  // Parse the optional attribute list. +  Attributes Attrs; +  if (parseOptionalAttributes(Attrs)) +    return; + +  // Parse the opening brace. +  if (!Tok.is(MMToken::LBrace)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace) +      << ModuleName; +    HadError = true; +    return; +  } +  SourceLocation LBraceLoc = consumeToken(); + +  // Determine whether this (sub)module has already been defined. +  Module *ShadowingModule = nullptr; +  if (Module *Existing = Map.lookupModuleQualified(ModuleName, ActiveModule)) { +    // We might see a (re)definition of a module that we already have a +    // definition for in two cases: +    //  - If we loaded one definition from an AST file and we've just found a +    //    corresponding definition in a module map file, or +    bool LoadedFromASTFile = Existing->DefinitionLoc.isInvalid(); +    //  - If we're building a (preprocessed) module and we've just loaded the +    //    module map file from which it was created. +    bool ParsedAsMainInput = +        Map.LangOpts.getCompilingModule() == LangOptions::CMK_ModuleMap && +        Map.LangOpts.CurrentModule == ModuleName && +        SourceMgr.getDecomposedLoc(ModuleNameLoc).first != +            SourceMgr.getDecomposedLoc(Existing->DefinitionLoc).first; +    if (!ActiveModule && (LoadedFromASTFile || ParsedAsMainInput)) { +      // Skip the module definition. +      skipUntil(MMToken::RBrace); +      if (Tok.is(MMToken::RBrace)) +        consumeToken(); +      else { +        Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace); +        Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match); +        HadError = true; +      } +      return; +    } + +    if (!Existing->Parent && Map.mayShadowNewModule(Existing)) { +      ShadowingModule = Existing; +    } else { +      // This is not a shawdowed module decl, it is an illegal redefinition. +      Diags.Report(ModuleNameLoc, diag::err_mmap_module_redefinition) +          << ModuleName; +      Diags.Report(Existing->DefinitionLoc, diag::note_mmap_prev_definition); + +      // Skip the module definition. +      skipUntil(MMToken::RBrace); +      if (Tok.is(MMToken::RBrace)) +        consumeToken(); + +      HadError = true; +      return; +    } +  } + +  // Start defining this module. +  if (ShadowingModule) { +    ActiveModule = +        Map.createShadowedModule(ModuleName, Framework, ShadowingModule); +  } else { +    ActiveModule = +        Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit) +            .first; +  } + +  ActiveModule->DefinitionLoc = ModuleNameLoc; +  if (Attrs.IsSystem || IsSystem) +    ActiveModule->IsSystem = true; +  if (Attrs.IsExternC) +    ActiveModule->IsExternC = true; +  if (Attrs.NoUndeclaredIncludes || +      (!ActiveModule->Parent && ModuleName == "Darwin")) +    ActiveModule->NoUndeclaredIncludes = true; +  ActiveModule->Directory = Directory; + +  StringRef MapFileName(ModuleMapFile->getName()); +  if (MapFileName.endswith("module.private.modulemap") || +      MapFileName.endswith("module_private.map")) { +    ActiveModule->ModuleMapIsPrivate = true; +  } + +  // Private modules named as FooPrivate, Foo.Private or similar are likely a +  // user error; provide warnings, notes and fixits to direct users to use +  // Foo_Private instead. +  SourceLocation StartLoc = +      SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); +  if (Map.HeaderInfo.getHeaderSearchOpts().ImplicitModuleMaps && +      !Diags.isIgnored(diag::warn_mmap_mismatched_private_submodule, +                       StartLoc) && +      !Diags.isIgnored(diag::warn_mmap_mismatched_private_module_name, +                       StartLoc) && +      ActiveModule->ModuleMapIsPrivate) +    diagnosePrivateModules(ExplicitLoc, FrameworkLoc); + +  bool Done = false; +  do { +    switch (Tok.Kind) { +    case MMToken::EndOfFile: +    case MMToken::RBrace: +      Done = true; +      break; + +    case MMToken::ConfigMacros: +      parseConfigMacros(); +      break; + +    case MMToken::Conflict: +      parseConflict(); +      break; + +    case MMToken::ExplicitKeyword: +    case MMToken::ExternKeyword: +    case MMToken::FrameworkKeyword: +    case MMToken::ModuleKeyword: +      parseModuleDecl(); +      break; + +    case MMToken::ExportKeyword: +      parseExportDecl(); +      break; + +    case MMToken::ExportAsKeyword: +      parseExportAsDecl(); +      break; + +    case MMToken::UseKeyword: +      parseUseDecl(); +      break; + +    case MMToken::RequiresKeyword: +      parseRequiresDecl(); +      break; + +    case MMToken::TextualKeyword: +      parseHeaderDecl(MMToken::TextualKeyword, consumeToken()); +      break; + +    case MMToken::UmbrellaKeyword: { +      SourceLocation UmbrellaLoc = consumeToken(); +      if (Tok.is(MMToken::HeaderKeyword)) +        parseHeaderDecl(MMToken::UmbrellaKeyword, UmbrellaLoc); +      else +        parseUmbrellaDirDecl(UmbrellaLoc); +      break; +    } + +    case MMToken::ExcludeKeyword: +      parseHeaderDecl(MMToken::ExcludeKeyword, consumeToken()); +      break; + +    case MMToken::PrivateKeyword: +      parseHeaderDecl(MMToken::PrivateKeyword, consumeToken()); +      break; + +    case MMToken::HeaderKeyword: +      parseHeaderDecl(MMToken::HeaderKeyword, consumeToken()); +      break; + +    case MMToken::LinkKeyword: +      parseLinkDecl(); +      break; + +    default: +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_member); +      consumeToken(); +      break; +    } +  } while (!Done); + +  if (Tok.is(MMToken::RBrace)) +    consumeToken(); +  else { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace); +    Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match); +    HadError = true; +  } + +  // If the active module is a top-level framework, and there are no link +  // libraries, automatically link against the framework. +  if (ActiveModule->IsFramework && !ActiveModule->isSubFramework() && +      ActiveModule->LinkLibraries.empty()) { +    inferFrameworkLink(ActiveModule, Directory, SourceMgr.getFileManager()); +  } + +  // If the module meets all requirements but is still unavailable, mark the +  // whole tree as unavailable to prevent it from building. +  if (!ActiveModule->IsAvailable && !ActiveModule->IsMissingRequirement && +      ActiveModule->Parent) { +    ActiveModule->getTopLevelModule()->markUnavailable(); +    ActiveModule->getTopLevelModule()->MissingHeaders.append( +      ActiveModule->MissingHeaders.begin(), ActiveModule->MissingHeaders.end()); +  } + +  // We're done parsing this module. Pop back to the previous module. +  ActiveModule = PreviousActiveModule; +} + +/// Parse an extern module declaration. +/// +///   extern module-declaration: +///     'extern' 'module' module-id string-literal +void ModuleMapParser::parseExternModuleDecl() { +  assert(Tok.is(MMToken::ExternKeyword)); +  SourceLocation ExternLoc = consumeToken(); // 'extern' keyword + +  // Parse 'module' keyword. +  if (!Tok.is(MMToken::ModuleKeyword)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module); +    consumeToken(); +    HadError = true; +    return; +  } +  consumeToken(); // 'module' keyword + +  // Parse the module name. +  ModuleId Id; +  if (parseModuleId(Id)) { +    HadError = true; +    return; +  } + +  // Parse the referenced module map file name. +  if (!Tok.is(MMToken::StringLiteral)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_mmap_file); +    HadError = true; +    return; +  } +  std::string FileName = Tok.getString(); +  consumeToken(); // filename + +  StringRef FileNameRef = FileName; +  SmallString<128> ModuleMapFileName; +  if (llvm::sys::path::is_relative(FileNameRef)) { +    ModuleMapFileName += Directory->getName(); +    llvm::sys::path::append(ModuleMapFileName, FileName); +    FileNameRef = ModuleMapFileName; +  } +  if (auto File = SourceMgr.getFileManager().getFile(FileNameRef)) +    Map.parseModuleMapFile( +        *File, /*IsSystem=*/false, +        Map.HeaderInfo.getHeaderSearchOpts().ModuleMapFileHomeIsCwd +            ? Directory +            : (*File)->getDir(), +        FileID(), nullptr, ExternLoc); +} + +/// Whether to add the requirement \p Feature to the module \p M. +/// +/// This preserves backwards compatibility for two hacks in the Darwin system +/// module map files: +/// +/// 1. The use of 'requires excluded' to make headers non-modular, which +///    should really be mapped to 'textual' now that we have this feature.  We +///    drop the 'excluded' requirement, and set \p IsRequiresExcludedHack to +///    true.  Later, this bit will be used to map all the headers inside this +///    module to 'textual'. +/// +///    This affects Darwin.C.excluded (for assert.h) and Tcl.Private. +/// +/// 2. Removes a bogus cplusplus requirement from IOKit.avc.  This requirement +///    was never correct and causes issues now that we check it, so drop it. +static bool shouldAddRequirement(Module *M, StringRef Feature, +                                 bool &IsRequiresExcludedHack) { +  if (Feature == "excluded" && +      (M->fullModuleNameIs({"Darwin", "C", "excluded"}) || +       M->fullModuleNameIs({"Tcl", "Private"}))) { +    IsRequiresExcludedHack = true; +    return false; +  } else if (Feature == "cplusplus" && M->fullModuleNameIs({"IOKit", "avc"})) { +    return false; +  } + +  return true; +} + +/// Parse a requires declaration. +/// +///   requires-declaration: +///     'requires' feature-list +/// +///   feature-list: +///     feature ',' feature-list +///     feature +/// +///   feature: +///     '!'[opt] identifier +void ModuleMapParser::parseRequiresDecl() { +  assert(Tok.is(MMToken::RequiresKeyword)); + +  // Parse 'requires' keyword. +  consumeToken(); + +  // Parse the feature-list. +  do { +    bool RequiredState = true; +    if (Tok.is(MMToken::Exclaim)) { +      RequiredState = false; +      consumeToken(); +    } + +    if (!Tok.is(MMToken::Identifier)) { +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_feature); +      HadError = true; +      return; +    } + +    // Consume the feature name. +    std::string Feature = Tok.getString(); +    consumeToken(); + +    bool IsRequiresExcludedHack = false; +    bool ShouldAddRequirement = +        shouldAddRequirement(ActiveModule, Feature, IsRequiresExcludedHack); + +    if (IsRequiresExcludedHack) +      UsesRequiresExcludedHack.insert(ActiveModule); + +    if (ShouldAddRequirement) { +      // Add this feature. +      ActiveModule->addRequirement(Feature, RequiredState, Map.LangOpts, +                                   *Map.Target); +    } + +    if (!Tok.is(MMToken::Comma)) +      break; + +    // Consume the comma. +    consumeToken(); +  } while (true); +} + +/// Parse a header declaration. +/// +///   header-declaration: +///     'textual'[opt] 'header' string-literal +///     'private' 'textual'[opt] 'header' string-literal +///     'exclude' 'header' string-literal +///     'umbrella' 'header' string-literal +/// +/// FIXME: Support 'private textual header'. +void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, +                                      SourceLocation LeadingLoc) { +  // We've already consumed the first token. +  ModuleMap::ModuleHeaderRole Role = ModuleMap::NormalHeader; +  if (LeadingToken == MMToken::PrivateKeyword) { +    Role = ModuleMap::PrivateHeader; +    // 'private' may optionally be followed by 'textual'. +    if (Tok.is(MMToken::TextualKeyword)) { +      LeadingToken = Tok.Kind; +      consumeToken(); +    } +  } + +  if (LeadingToken == MMToken::TextualKeyword) +    Role = ModuleMap::ModuleHeaderRole(Role | ModuleMap::TextualHeader); + +  if (UsesRequiresExcludedHack.count(ActiveModule)) { +    // Mark this header 'textual' (see doc comment for +    // Module::UsesRequiresExcludedHack). +    Role = ModuleMap::ModuleHeaderRole(Role | ModuleMap::TextualHeader); +  } + +  if (LeadingToken != MMToken::HeaderKeyword) { +    if (!Tok.is(MMToken::HeaderKeyword)) { +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) +          << (LeadingToken == MMToken::PrivateKeyword ? "private" : +              LeadingToken == MMToken::ExcludeKeyword ? "exclude" : +              LeadingToken == MMToken::TextualKeyword ? "textual" : "umbrella"); +      return; +    } +    consumeToken(); +  } + +  // Parse the header name. +  if (!Tok.is(MMToken::StringLiteral)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) +      << "header"; +    HadError = true; +    return; +  } +  Module::UnresolvedHeaderDirective Header; +  Header.FileName = Tok.getString(); +  Header.FileNameLoc = consumeToken(); +  Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword; +  Header.Kind = +      (LeadingToken == MMToken::ExcludeKeyword ? Module::HK_Excluded +                                               : Map.headerRoleToKind(Role)); + +  // Check whether we already have an umbrella. +  if (Header.IsUmbrella && ActiveModule->Umbrella) { +    Diags.Report(Header.FileNameLoc, diag::err_mmap_umbrella_clash) +      << ActiveModule->getFullModuleName(); +    HadError = true; +    return; +  } + +  // If we were given stat information, parse it so we can skip looking for +  // the file. +  if (Tok.is(MMToken::LBrace)) { +    SourceLocation LBraceLoc = consumeToken(); + +    while (!Tok.is(MMToken::RBrace) && !Tok.is(MMToken::EndOfFile)) { +      enum Attribute { Size, ModTime, Unknown }; +      StringRef Str = Tok.getString(); +      SourceLocation Loc = consumeToken(); +      switch (llvm::StringSwitch<Attribute>(Str) +                  .Case("size", Size) +                  .Case("mtime", ModTime) +                  .Default(Unknown)) { +      case Size: +        if (Header.Size) +          Diags.Report(Loc, diag::err_mmap_duplicate_header_attribute) << Str; +        if (!Tok.is(MMToken::IntegerLiteral)) { +          Diags.Report(Tok.getLocation(), +                       diag::err_mmap_invalid_header_attribute_value) << Str; +          skipUntil(MMToken::RBrace); +          break; +        } +        Header.Size = Tok.getInteger(); +        consumeToken(); +        break; + +      case ModTime: +        if (Header.ModTime) +          Diags.Report(Loc, diag::err_mmap_duplicate_header_attribute) << Str; +        if (!Tok.is(MMToken::IntegerLiteral)) { +          Diags.Report(Tok.getLocation(), +                       diag::err_mmap_invalid_header_attribute_value) << Str; +          skipUntil(MMToken::RBrace); +          break; +        } +        Header.ModTime = Tok.getInteger(); +        consumeToken(); +        break; + +      case Unknown: +        Diags.Report(Loc, diag::err_mmap_expected_header_attribute); +        skipUntil(MMToken::RBrace); +        break; +      } +    } + +    if (Tok.is(MMToken::RBrace)) +      consumeToken(); +    else { +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace); +      Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match); +      HadError = true; +    } +  } + +  bool NeedsFramework = false; +  Map.addUnresolvedHeader(ActiveModule, std::move(Header), NeedsFramework); + +  if (NeedsFramework && ActiveModule) +    Diags.Report(CurrModuleDeclLoc, diag::note_mmap_add_framework_keyword) +      << ActiveModule->getFullModuleName() +      << FixItHint::CreateReplacement(CurrModuleDeclLoc, "framework module"); +} + +static int compareModuleHeaders(const Module::Header *A, +                                const Module::Header *B) { +  return A->NameAsWritten.compare(B->NameAsWritten); +} + +/// Parse an umbrella directory declaration. +/// +///   umbrella-dir-declaration: +///     umbrella string-literal +void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) { +  // Parse the directory name. +  if (!Tok.is(MMToken::StringLiteral)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header) +      << "umbrella"; +    HadError = true; +    return; +  } + +  std::string DirName = Tok.getString(); +  SourceLocation DirNameLoc = consumeToken(); + +  // Check whether we already have an umbrella. +  if (ActiveModule->Umbrella) { +    Diags.Report(DirNameLoc, diag::err_mmap_umbrella_clash) +      << ActiveModule->getFullModuleName(); +    HadError = true; +    return; +  } + +  // Look for this file. +  const DirectoryEntry *Dir = nullptr; +  if (llvm::sys::path::is_absolute(DirName)) { +    if (auto D = SourceMgr.getFileManager().getDirectory(DirName)) +      Dir = *D; +  } else { +    SmallString<128> PathName; +    PathName = Directory->getName(); +    llvm::sys::path::append(PathName, DirName); +    if (auto D = SourceMgr.getFileManager().getDirectory(PathName)) +      Dir = *D; +  } + +  if (!Dir) { +    Diags.Report(DirNameLoc, diag::warn_mmap_umbrella_dir_not_found) +      << DirName; +    return; +  } + +  if (UsesRequiresExcludedHack.count(ActiveModule)) { +    // Mark this header 'textual' (see doc comment for +    // ModuleMapParser::UsesRequiresExcludedHack). Although iterating over the +    // directory is relatively expensive, in practice this only applies to the +    // uncommonly used Tcl module on Darwin platforms. +    std::error_code EC; +    SmallVector<Module::Header, 6> Headers; +    llvm::vfs::FileSystem &FS = +        SourceMgr.getFileManager().getVirtualFileSystem(); +    for (llvm::vfs::recursive_directory_iterator I(FS, Dir->getName(), EC), E; +         I != E && !EC; I.increment(EC)) { +      if (auto FE = SourceMgr.getFileManager().getFile(I->path())) { + +        Module::Header Header = {I->path(), *FE}; +        Headers.push_back(std::move(Header)); +      } +    } + +    // Sort header paths so that the pcm doesn't depend on iteration order. +    llvm::array_pod_sort(Headers.begin(), Headers.end(), compareModuleHeaders); + +    for (auto &Header : Headers) +      Map.addHeader(ActiveModule, std::move(Header), ModuleMap::TextualHeader); +    return; +  } + +  if (Module *OwningModule = Map.UmbrellaDirs[Dir]) { +    Diags.Report(UmbrellaLoc, diag::err_mmap_umbrella_clash) +      << OwningModule->getFullModuleName(); +    HadError = true; +    return; +  } + +  // Record this umbrella directory. +  Map.setUmbrellaDir(ActiveModule, Dir, DirName); +} + +/// Parse a module export declaration. +/// +///   export-declaration: +///     'export' wildcard-module-id +/// +///   wildcard-module-id: +///     identifier +///     '*' +///     identifier '.' wildcard-module-id +void ModuleMapParser::parseExportDecl() { +  assert(Tok.is(MMToken::ExportKeyword)); +  SourceLocation ExportLoc = consumeToken(); + +  // Parse the module-id with an optional wildcard at the end. +  ModuleId ParsedModuleId; +  bool Wildcard = false; +  do { +    // FIXME: Support string-literal module names here. +    if (Tok.is(MMToken::Identifier)) { +      ParsedModuleId.push_back(std::make_pair(Tok.getString(), +                                              Tok.getLocation())); +      consumeToken(); + +      if (Tok.is(MMToken::Period)) { +        consumeToken(); +        continue; +      } + +      break; +    } + +    if(Tok.is(MMToken::Star)) { +      Wildcard = true; +      consumeToken(); +      break; +    } + +    Diags.Report(Tok.getLocation(), diag::err_mmap_module_id); +    HadError = true; +    return; +  } while (true); + +  Module::UnresolvedExportDecl Unresolved = { +    ExportLoc, ParsedModuleId, Wildcard +  }; +  ActiveModule->UnresolvedExports.push_back(Unresolved); +} + +/// Parse a module export_as declaration. +/// +///   export-as-declaration: +///     'export_as' identifier +void ModuleMapParser::parseExportAsDecl() { +  assert(Tok.is(MMToken::ExportAsKeyword)); +  consumeToken(); + +  if (!Tok.is(MMToken::Identifier)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_module_id); +    HadError = true; +    return; +  } + +  if (ActiveModule->Parent) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_submodule_export_as); +    consumeToken(); +    return; +  } + +  if (!ActiveModule->ExportAsModule.empty()) { +    if (ActiveModule->ExportAsModule == Tok.getString()) { +      Diags.Report(Tok.getLocation(), diag::warn_mmap_redundant_export_as) +        << ActiveModule->Name << Tok.getString(); +    } else { +      Diags.Report(Tok.getLocation(), diag::err_mmap_conflicting_export_as) +        << ActiveModule->Name << ActiveModule->ExportAsModule +        << Tok.getString(); +    } +  } + +  ActiveModule->ExportAsModule = Tok.getString(); +  Map.addLinkAsDependency(ActiveModule); + +  consumeToken(); +} + +/// Parse a module use declaration. +/// +///   use-declaration: +///     'use' wildcard-module-id +void ModuleMapParser::parseUseDecl() { +  assert(Tok.is(MMToken::UseKeyword)); +  auto KWLoc = consumeToken(); +  // Parse the module-id. +  ModuleId ParsedModuleId; +  parseModuleId(ParsedModuleId); + +  if (ActiveModule->Parent) +    Diags.Report(KWLoc, diag::err_mmap_use_decl_submodule); +  else +    ActiveModule->UnresolvedDirectUses.push_back(ParsedModuleId); +} + +/// Parse a link declaration. +/// +///   module-declaration: +///     'link' 'framework'[opt] string-literal +void ModuleMapParser::parseLinkDecl() { +  assert(Tok.is(MMToken::LinkKeyword)); +  SourceLocation LinkLoc = consumeToken(); + +  // Parse the optional 'framework' keyword. +  bool IsFramework = false; +  if (Tok.is(MMToken::FrameworkKeyword)) { +    consumeToken(); +    IsFramework = true; +  } + +  // Parse the library name +  if (!Tok.is(MMToken::StringLiteral)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_library_name) +      << IsFramework << SourceRange(LinkLoc); +    HadError = true; +    return; +  } + +  std::string LibraryName = Tok.getString(); +  consumeToken(); +  ActiveModule->LinkLibraries.push_back(Module::LinkLibrary(LibraryName, +                                                            IsFramework)); +} + +/// Parse a configuration macro declaration. +/// +///   module-declaration: +///     'config_macros' attributes[opt] config-macro-list? +/// +///   config-macro-list: +///     identifier (',' identifier)? +void ModuleMapParser::parseConfigMacros() { +  assert(Tok.is(MMToken::ConfigMacros)); +  SourceLocation ConfigMacrosLoc = consumeToken(); + +  // Only top-level modules can have configuration macros. +  if (ActiveModule->Parent) { +    Diags.Report(ConfigMacrosLoc, diag::err_mmap_config_macro_submodule); +  } + +  // Parse the optional attributes. +  Attributes Attrs; +  if (parseOptionalAttributes(Attrs)) +    return; + +  if (Attrs.IsExhaustive && !ActiveModule->Parent) { +    ActiveModule->ConfigMacrosExhaustive = true; +  } + +  // If we don't have an identifier, we're done. +  // FIXME: Support macros with the same name as a keyword here. +  if (!Tok.is(MMToken::Identifier)) +    return; + +  // Consume the first identifier. +  if (!ActiveModule->Parent) { +    ActiveModule->ConfigMacros.push_back(Tok.getString().str()); +  } +  consumeToken(); + +  do { +    // If there's a comma, consume it. +    if (!Tok.is(MMToken::Comma)) +      break; +    consumeToken(); + +    // We expect to see a macro name here. +    // FIXME: Support macros with the same name as a keyword here. +    if (!Tok.is(MMToken::Identifier)) { +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_config_macro); +      break; +    } + +    // Consume the macro name. +    if (!ActiveModule->Parent) { +      ActiveModule->ConfigMacros.push_back(Tok.getString().str()); +    } +    consumeToken(); +  } while (true); +} + +/// Format a module-id into a string. +static std::string formatModuleId(const ModuleId &Id) { +  std::string result; +  { +    llvm::raw_string_ostream OS(result); + +    for (unsigned I = 0, N = Id.size(); I != N; ++I) { +      if (I) +        OS << "."; +      OS << Id[I].first; +    } +  } + +  return result; +} + +/// Parse a conflict declaration. +/// +///   module-declaration: +///     'conflict' module-id ',' string-literal +void ModuleMapParser::parseConflict() { +  assert(Tok.is(MMToken::Conflict)); +  SourceLocation ConflictLoc = consumeToken(); +  Module::UnresolvedConflict Conflict; + +  // Parse the module-id. +  if (parseModuleId(Conflict.Id)) +    return; + +  // Parse the ','. +  if (!Tok.is(MMToken::Comma)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_comma) +      << SourceRange(ConflictLoc); +    return; +  } +  consumeToken(); + +  // Parse the message. +  if (!Tok.is(MMToken::StringLiteral)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_conflicts_message) +      << formatModuleId(Conflict.Id); +    return; +  } +  Conflict.Message = Tok.getString().str(); +  consumeToken(); + +  // Add this unresolved conflict. +  ActiveModule->UnresolvedConflicts.push_back(Conflict); +} + +/// Parse an inferred module declaration (wildcard modules). +/// +///   module-declaration: +///     'explicit'[opt] 'framework'[opt] 'module' * attributes[opt] +///       { inferred-module-member* } +/// +///   inferred-module-member: +///     'export' '*' +///     'exclude' identifier +void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) { +  assert(Tok.is(MMToken::Star)); +  SourceLocation StarLoc = consumeToken(); +  bool Failed = false; + +  // Inferred modules must be submodules. +  if (!ActiveModule && !Framework) { +    Diags.Report(StarLoc, diag::err_mmap_top_level_inferred_submodule); +    Failed = true; +  } + +  if (ActiveModule) { +    // Inferred modules must have umbrella directories. +    if (!Failed && ActiveModule->IsAvailable && +        !ActiveModule->getUmbrellaDir()) { +      Diags.Report(StarLoc, diag::err_mmap_inferred_no_umbrella); +      Failed = true; +    } + +    // Check for redefinition of an inferred module. +    if (!Failed && ActiveModule->InferSubmodules) { +      Diags.Report(StarLoc, diag::err_mmap_inferred_redef); +      if (ActiveModule->InferredSubmoduleLoc.isValid()) +        Diags.Report(ActiveModule->InferredSubmoduleLoc, +                     diag::note_mmap_prev_definition); +      Failed = true; +    } + +    // Check for the 'framework' keyword, which is not permitted here. +    if (Framework) { +      Diags.Report(StarLoc, diag::err_mmap_inferred_framework_submodule); +      Framework = false; +    } +  } else if (Explicit) { +    Diags.Report(StarLoc, diag::err_mmap_explicit_inferred_framework); +    Explicit = false; +  } + +  // If there were any problems with this inferred submodule, skip its body. +  if (Failed) { +    if (Tok.is(MMToken::LBrace)) { +      consumeToken(); +      skipUntil(MMToken::RBrace); +      if (Tok.is(MMToken::RBrace)) +        consumeToken(); +    } +    HadError = true; +    return; +  } + +  // Parse optional attributes. +  Attributes Attrs; +  if (parseOptionalAttributes(Attrs)) +    return; + +  if (ActiveModule) { +    // Note that we have an inferred submodule. +    ActiveModule->InferSubmodules = true; +    ActiveModule->InferredSubmoduleLoc = StarLoc; +    ActiveModule->InferExplicitSubmodules = Explicit; +  } else { +    // We'll be inferring framework modules for this directory. +    Map.InferredDirectories[Directory].InferModules = true; +    Map.InferredDirectories[Directory].Attrs = Attrs; +    Map.InferredDirectories[Directory].ModuleMapFile = ModuleMapFile; +    // FIXME: Handle the 'framework' keyword. +  } + +  // Parse the opening brace. +  if (!Tok.is(MMToken::LBrace)) { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_lbrace_wildcard); +    HadError = true; +    return; +  } +  SourceLocation LBraceLoc = consumeToken(); + +  // Parse the body of the inferred submodule. +  bool Done = false; +  do { +    switch (Tok.Kind) { +    case MMToken::EndOfFile: +    case MMToken::RBrace: +      Done = true; +      break; + +    case MMToken::ExcludeKeyword: +      if (ActiveModule) { +        Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member) +          << (ActiveModule != nullptr); +        consumeToken(); +        break; +      } + +      consumeToken(); +      // FIXME: Support string-literal module names here. +      if (!Tok.is(MMToken::Identifier)) { +        Diags.Report(Tok.getLocation(), diag::err_mmap_missing_exclude_name); +        break; +      } + +      Map.InferredDirectories[Directory].ExcludedModules +        .push_back(Tok.getString()); +      consumeToken(); +      break; + +    case MMToken::ExportKeyword: +      if (!ActiveModule) { +        Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member) +          << (ActiveModule != nullptr); +        consumeToken(); +        break; +      } + +      consumeToken(); +      if (Tok.is(MMToken::Star)) +        ActiveModule->InferExportWildcard = true; +      else +        Diags.Report(Tok.getLocation(), +                     diag::err_mmap_expected_export_wildcard); +      consumeToken(); +      break; + +    case MMToken::ExplicitKeyword: +    case MMToken::ModuleKeyword: +    case MMToken::HeaderKeyword: +    case MMToken::PrivateKeyword: +    case MMToken::UmbrellaKeyword: +    default: +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_inferred_member) +          << (ActiveModule != nullptr); +      consumeToken(); +      break; +    } +  } while (!Done); + +  if (Tok.is(MMToken::RBrace)) +    consumeToken(); +  else { +    Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rbrace); +    Diags.Report(LBraceLoc, diag::note_mmap_lbrace_match); +    HadError = true; +  } +} + +/// Parse optional attributes. +/// +///   attributes: +///     attribute attributes +///     attribute +/// +///   attribute: +///     [ identifier ] +/// +/// \param Attrs Will be filled in with the parsed attributes. +/// +/// \returns true if an error occurred, false otherwise. +bool ModuleMapParser::parseOptionalAttributes(Attributes &Attrs) { +  bool HadError = false; + +  while (Tok.is(MMToken::LSquare)) { +    // Consume the '['. +    SourceLocation LSquareLoc = consumeToken(); + +    // Check whether we have an attribute name here. +    if (!Tok.is(MMToken::Identifier)) { +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_attribute); +      skipUntil(MMToken::RSquare); +      if (Tok.is(MMToken::RSquare)) +        consumeToken(); +      HadError = true; +    } + +    // Decode the attribute name. +    AttributeKind Attribute +      = llvm::StringSwitch<AttributeKind>(Tok.getString()) +          .Case("exhaustive", AT_exhaustive) +          .Case("extern_c", AT_extern_c) +          .Case("no_undeclared_includes", AT_no_undeclared_includes) +          .Case("system", AT_system) +          .Default(AT_unknown); +    switch (Attribute) { +    case AT_unknown: +      Diags.Report(Tok.getLocation(), diag::warn_mmap_unknown_attribute) +        << Tok.getString(); +      break; + +    case AT_system: +      Attrs.IsSystem = true; +      break; + +    case AT_extern_c: +      Attrs.IsExternC = true; +      break; + +    case AT_exhaustive: +      Attrs.IsExhaustive = true; +      break; + +    case AT_no_undeclared_includes: +      Attrs.NoUndeclaredIncludes = true; +      break; +    } +    consumeToken(); + +    // Consume the ']'. +    if (!Tok.is(MMToken::RSquare)) { +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_rsquare); +      Diags.Report(LSquareLoc, diag::note_mmap_lsquare_match); +      skipUntil(MMToken::RSquare); +      HadError = true; +    } + +    if (Tok.is(MMToken::RSquare)) +      consumeToken(); +  } + +  return HadError; +} + +/// Parse a module map file. +/// +///   module-map-file: +///     module-declaration* +bool ModuleMapParser::parseModuleMapFile() { +  do { +    switch (Tok.Kind) { +    case MMToken::EndOfFile: +      return HadError; + +    case MMToken::ExplicitKeyword: +    case MMToken::ExternKeyword: +    case MMToken::ModuleKeyword: +    case MMToken::FrameworkKeyword: +      parseModuleDecl(); +      break; + +    case MMToken::Comma: +    case MMToken::ConfigMacros: +    case MMToken::Conflict: +    case MMToken::Exclaim: +    case MMToken::ExcludeKeyword: +    case MMToken::ExportKeyword: +    case MMToken::ExportAsKeyword: +    case MMToken::HeaderKeyword: +    case MMToken::Identifier: +    case MMToken::LBrace: +    case MMToken::LinkKeyword: +    case MMToken::LSquare: +    case MMToken::Period: +    case MMToken::PrivateKeyword: +    case MMToken::RBrace: +    case MMToken::RSquare: +    case MMToken::RequiresKeyword: +    case MMToken::Star: +    case MMToken::StringLiteral: +    case MMToken::IntegerLiteral: +    case MMToken::TextualKeyword: +    case MMToken::UmbrellaKeyword: +    case MMToken::UseKeyword: +      Diags.Report(Tok.getLocation(), diag::err_mmap_expected_module); +      HadError = true; +      consumeToken(); +      break; +    } +  } while (true); +} + +bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem, +                                   const DirectoryEntry *Dir, FileID ID, +                                   unsigned *Offset, +                                   SourceLocation ExternModuleLoc) { +  assert(Target && "Missing target information"); +  llvm::DenseMap<const FileEntry *, bool>::iterator Known +    = ParsedModuleMap.find(File); +  if (Known != ParsedModuleMap.end()) +    return Known->second; + +  // If the module map file wasn't already entered, do so now. +  if (ID.isInvalid()) { +    auto FileCharacter = +        IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap; +    ID = SourceMgr.createFileID(File, ExternModuleLoc, FileCharacter); +  } + +  assert(Target && "Missing target information"); +  const llvm::MemoryBuffer *Buffer = SourceMgr.getBuffer(ID); +  if (!Buffer) +    return ParsedModuleMap[File] = true; +  assert((!Offset || *Offset <= Buffer->getBufferSize()) && +         "invalid buffer offset"); + +  // Parse this module map file. +  Lexer L(SourceMgr.getLocForStartOfFile(ID), MMapLangOpts, +          Buffer->getBufferStart(), +          Buffer->getBufferStart() + (Offset ? *Offset : 0), +          Buffer->getBufferEnd()); +  SourceLocation Start = L.getSourceLocation(); +  ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, File, Dir, +                         IsSystem); +  bool Result = Parser.parseModuleMapFile(); +  ParsedModuleMap[File] = Result; + +  if (Offset) { +    auto Loc = SourceMgr.getDecomposedLoc(Parser.getLocation()); +    assert(Loc.first == ID && "stopped in a different file?"); +    *Offset = Loc.second; +  } + +  // Notify callbacks that we parsed it. +  for (const auto &Cb : Callbacks) +    Cb->moduleMapFileRead(Start, *File, IsSystem); + +  return Result; +} diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp new file mode 100644 index 000000000000..31548d246d5a --- /dev/null +++ b/clang/lib/Lex/PPCaching.cpp @@ -0,0 +1,163 @@ +//===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements pieces of the Preprocessor interface that manage the +// caching of lexed tokens. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +using namespace clang; + +// EnableBacktrackAtThisPos - From the point that this method is called, and +// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor +// keeps track of the lexed tokens so that a subsequent Backtrack() call will +// make the Preprocessor re-lex the same tokens. +// +// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can +// be called multiple times and CommitBacktrackedTokens/Backtrack calls will +// be combined with the EnableBacktrackAtThisPos calls in reverse order. +void Preprocessor::EnableBacktrackAtThisPos() { +  assert(LexLevel == 0 && "cannot use lookahead while lexing"); +  BacktrackPositions.push_back(CachedLexPos); +  EnterCachingLexMode(); +} + +// Disable the last EnableBacktrackAtThisPos call. +void Preprocessor::CommitBacktrackedTokens() { +  assert(!BacktrackPositions.empty() +         && "EnableBacktrackAtThisPos was not called!"); +  BacktrackPositions.pop_back(); +} + +// Make Preprocessor re-lex the tokens that were lexed since +// EnableBacktrackAtThisPos() was previously called. +void Preprocessor::Backtrack() { +  assert(!BacktrackPositions.empty() +         && "EnableBacktrackAtThisPos was not called!"); +  CachedLexPos = BacktrackPositions.back(); +  BacktrackPositions.pop_back(); +  recomputeCurLexerKind(); +} + +void Preprocessor::CachingLex(Token &Result) { +  if (!InCachingLexMode()) +    return; + +  // The assert in EnterCachingLexMode should prevent this from happening. +  assert(LexLevel == 1 && +         "should not use token caching within the preprocessor"); + +  if (CachedLexPos < CachedTokens.size()) { +    Result = CachedTokens[CachedLexPos++]; +    Result.setFlag(Token::IsReinjected); +    return; +  } + +  ExitCachingLexMode(); +  Lex(Result); + +  if (isBacktrackEnabled()) { +    // Cache the lexed token. +    EnterCachingLexModeUnchecked(); +    CachedTokens.push_back(Result); +    ++CachedLexPos; +    return; +  } + +  if (CachedLexPos < CachedTokens.size()) { +    EnterCachingLexModeUnchecked(); +  } else { +    // All cached tokens were consumed. +    CachedTokens.clear(); +    CachedLexPos = 0; +  } +} + +void Preprocessor::EnterCachingLexMode() { +  // The caching layer sits on top of all the other lexers, so it's incorrect +  // to cache tokens while inside a nested lex action. The cached tokens would +  // be retained after returning to the enclosing lex action and, at best, +  // would appear at the wrong position in the token stream. +  assert(LexLevel == 0 && +         "entered caching lex mode while lexing something else"); + +  if (InCachingLexMode()) { +    assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind"); +    return; +  } + +  EnterCachingLexModeUnchecked(); +} + +void Preprocessor::EnterCachingLexModeUnchecked() { +  assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode"); +  PushIncludeMacroStack(); +  CurLexerKind = CLK_CachingLexer; +} + + +const Token &Preprocessor::PeekAhead(unsigned N) { +  assert(CachedLexPos + N > CachedTokens.size() && "Confused caching."); +  ExitCachingLexMode(); +  for (size_t C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) { +    CachedTokens.push_back(Token()); +    Lex(CachedTokens.back()); +  } +  EnterCachingLexMode(); +  return CachedTokens.back(); +} + +void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) { +  assert(Tok.isAnnotation() && "Expected annotation token"); +  assert(CachedLexPos != 0 && "Expected to have some cached tokens"); +  assert(CachedTokens[CachedLexPos-1].getLastLoc() == Tok.getAnnotationEndLoc() +         && "The annotation should be until the most recent cached token"); + +  // Start from the end of the cached tokens list and look for the token +  // that is the beginning of the annotation token. +  for (CachedTokensTy::size_type i = CachedLexPos; i != 0; --i) { +    CachedTokensTy::iterator AnnotBegin = CachedTokens.begin() + i-1; +    if (AnnotBegin->getLocation() == Tok.getLocation()) { +      assert((BacktrackPositions.empty() || BacktrackPositions.back() <= i) && +             "The backtrack pos points inside the annotated tokens!"); +      // Replace the cached tokens with the single annotation token. +      if (i < CachedLexPos) +        CachedTokens.erase(AnnotBegin + 1, CachedTokens.begin() + CachedLexPos); +      *AnnotBegin = Tok; +      CachedLexPos = i; +      return; +    } +  } +} + +bool Preprocessor::IsPreviousCachedToken(const Token &Tok) const { +  // There's currently no cached token... +  if (!CachedLexPos) +    return false; + +  const Token LastCachedTok = CachedTokens[CachedLexPos - 1]; +  if (LastCachedTok.getKind() != Tok.getKind()) +    return false; + +  int RelOffset = 0; +  if ((!getSourceManager().isInSameSLocAddrSpace( +          Tok.getLocation(), getLastCachedTokenLocation(), &RelOffset)) || +      RelOffset) +    return false; + +  return true; +} + +void Preprocessor::ReplacePreviousCachedToken(ArrayRef<Token> NewToks) { +  assert(CachedLexPos != 0 && "Expected to have some cached tokens"); +  CachedTokens.insert(CachedTokens.begin() + CachedLexPos - 1, NewToks.begin(), +                      NewToks.end()); +  CachedTokens.erase(CachedTokens.begin() + CachedLexPos - 1 + NewToks.size()); +  CachedLexPos += NewToks.size() - 1; +} diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp new file mode 100644 index 000000000000..cd8b04b20d24 --- /dev/null +++ b/clang/lib/Lex/PPCallbacks.cpp @@ -0,0 +1,13 @@ +//===--- PPCallbacks.cpp - Callbacks for Preprocessor actions ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/PPCallbacks.h" + +using namespace clang; + +void PPChainedCallbacks::anchor() { } diff --git a/clang/lib/Lex/PPConditionalDirectiveRecord.cpp b/clang/lib/Lex/PPConditionalDirectiveRecord.cpp new file mode 100644 index 000000000000..facee28007c7 --- /dev/null +++ b/clang/lib/Lex/PPConditionalDirectiveRecord.cpp @@ -0,0 +1,119 @@ +//===--- PPConditionalDirectiveRecord.h - Preprocessing Directives-*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements the PPConditionalDirectiveRecord class, which maintains +//  a record of conditional directive regions. +// +//===----------------------------------------------------------------------===// +#include "clang/Lex/PPConditionalDirectiveRecord.h" +#include "llvm/Support/Capacity.h" + +using namespace clang; + +PPConditionalDirectiveRecord::PPConditionalDirectiveRecord(SourceManager &SM) +  : SourceMgr(SM) { +  CondDirectiveStack.push_back(SourceLocation()); +} + +bool PPConditionalDirectiveRecord::rangeIntersectsConditionalDirective( +                                                      SourceRange Range) const { +  if (Range.isInvalid()) +    return false; + +  CondDirectiveLocsTy::const_iterator low = llvm::lower_bound( +      CondDirectiveLocs, Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr)); +  if (low == CondDirectiveLocs.end()) +    return false; + +  if (SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), low->getLoc())) +    return false; + +  CondDirectiveLocsTy::const_iterator +    upp = std::upper_bound(low, CondDirectiveLocs.end(), +                           Range.getEnd(), CondDirectiveLoc::Comp(SourceMgr)); +  SourceLocation uppRegion; +  if (upp != CondDirectiveLocs.end()) +    uppRegion = upp->getRegionLoc(); + +  return low->getRegionLoc() != uppRegion; +} + +SourceLocation PPConditionalDirectiveRecord::findConditionalDirectiveRegionLoc( +                                                     SourceLocation Loc) const { +  if (Loc.isInvalid()) +    return SourceLocation(); +  if (CondDirectiveLocs.empty()) +    return SourceLocation(); + +  if (SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(), +                                          Loc)) +    return CondDirectiveStack.back(); + +  CondDirectiveLocsTy::const_iterator low = llvm::lower_bound( +      CondDirectiveLocs, Loc, CondDirectiveLoc::Comp(SourceMgr)); +  assert(low != CondDirectiveLocs.end()); +  return low->getRegionLoc(); +} + +void PPConditionalDirectiveRecord::addCondDirectiveLoc( +                                                      CondDirectiveLoc DirLoc) { +  // Ignore directives in system headers. +  if (SourceMgr.isInSystemHeader(DirLoc.getLoc())) +    return; + +  assert(CondDirectiveLocs.empty() || +         SourceMgr.isBeforeInTranslationUnit(CondDirectiveLocs.back().getLoc(), +                                             DirLoc.getLoc())); +  CondDirectiveLocs.push_back(DirLoc); +} + +void PPConditionalDirectiveRecord::If(SourceLocation Loc, +                                      SourceRange ConditionRange, +                                      ConditionValueKind ConditionValue) { +  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +  CondDirectiveStack.push_back(Loc); +} + +void PPConditionalDirectiveRecord::Ifdef(SourceLocation Loc, +                                         const Token &MacroNameTok, +                                         const MacroDefinition &MD) { +  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +  CondDirectiveStack.push_back(Loc); +} + +void PPConditionalDirectiveRecord::Ifndef(SourceLocation Loc, +                                          const Token &MacroNameTok, +                                          const MacroDefinition &MD) { +  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +  CondDirectiveStack.push_back(Loc); +} + +void PPConditionalDirectiveRecord::Elif(SourceLocation Loc, +                                        SourceRange ConditionRange, +                                        ConditionValueKind ConditionValue, +                                        SourceLocation IfLoc) { +  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +  CondDirectiveStack.back() = Loc; +} + +void PPConditionalDirectiveRecord::Else(SourceLocation Loc, +                                        SourceLocation IfLoc) { +  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +  CondDirectiveStack.back() = Loc; +} + +void PPConditionalDirectiveRecord::Endif(SourceLocation Loc, +                                         SourceLocation IfLoc) { +  addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back())); +  assert(!CondDirectiveStack.empty()); +  CondDirectiveStack.pop_back(); +} + +size_t PPConditionalDirectiveRecord::getTotalMemory() const { +  return llvm::capacity_in_bytes(CondDirectiveLocs); +} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp new file mode 100644 index 000000000000..3b7eaee3c914 --- /dev/null +++ b/clang/lib/Lex/PPDirectives.cpp @@ -0,0 +1,3084 @@ +//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements # directive processing for the Preprocessor. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/ModuleMap.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Pragma.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Lex/Token.h" +#include "clang/Lex/VariadicMacroSupport.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Path.h" +#include <algorithm> +#include <cassert> +#include <cstring> +#include <new> +#include <string> +#include <utility> + +using namespace clang; + +//===----------------------------------------------------------------------===// +// Utility Methods for Preprocessor Directive Handling. +//===----------------------------------------------------------------------===// + +MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) { +  auto *MIChain = new (BP) MacroInfoChain{L, MIChainHead}; +  MIChainHead = MIChain; +  return &MIChain->MI; +} + +DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, +                                                           SourceLocation Loc) { +  return new (BP) DefMacroDirective(MI, Loc); +} + +UndefMacroDirective * +Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) { +  return new (BP) UndefMacroDirective(UndefLoc); +} + +VisibilityMacroDirective * +Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, +                                               bool isPublic) { +  return new (BP) VisibilityMacroDirective(Loc, isPublic); +} + +/// Read and discard all tokens remaining on the current line until +/// the tok::eod token is found. +SourceRange Preprocessor::DiscardUntilEndOfDirective() { +  Token Tmp; +  SourceRange Res; + +  LexUnexpandedToken(Tmp); +  Res.setBegin(Tmp.getLocation()); +  while (Tmp.isNot(tok::eod)) { +    assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens"); +    LexUnexpandedToken(Tmp); +  } +  Res.setEnd(Tmp.getLocation()); +  return Res; +} + +/// Enumerates possible cases of #define/#undef a reserved identifier. +enum MacroDiag { +  MD_NoWarn,        //> Not a reserved identifier +  MD_KeywordDef,    //> Macro hides keyword, enabled by default +  MD_ReservedMacro  //> #define of #undef reserved id, disabled by default +}; + +/// Checks if the specified identifier is reserved in the specified +/// language. +/// This function does not check if the identifier is a keyword. +static bool isReservedId(StringRef Text, const LangOptions &Lang) { +  // C++ [macro.names], C11 7.1.3: +  // All identifiers that begin with an underscore and either an uppercase +  // letter or another underscore are always reserved for any use. +  if (Text.size() >= 2 && Text[0] == '_' && +      (isUppercase(Text[1]) || Text[1] == '_')) +      return true; +  // C++ [global.names] +  // Each name that contains a double underscore ... is reserved to the +  // implementation for any use. +  if (Lang.CPlusPlus) { +    if (Text.find("__") != StringRef::npos) +      return true; +  } +  return false; +} + +// The -fmodule-name option tells the compiler to textually include headers in +// the specified module, meaning clang won't build the specified module. This is +// useful in a number of situations, for instance, when building a library that +// vends a module map, one might want to avoid hitting intermediate build +// products containimg the the module map or avoid finding the system installed +// modulemap for that library. +static bool isForModuleBuilding(Module *M, StringRef CurrentModule, +                                StringRef ModuleName) { +  StringRef TopLevelName = M->getTopLevelModuleName(); + +  // When building framework Foo, we wanna make sure that Foo *and* Foo_Private +  // are textually included and no modules are built for both. +  if (M->getTopLevelModule()->IsFramework && CurrentModule == ModuleName && +      !CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private")) +    TopLevelName = TopLevelName.drop_back(8); + +  return TopLevelName == CurrentModule; +} + +static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) { +  const LangOptions &Lang = PP.getLangOpts(); +  StringRef Text = II->getName(); +  if (isReservedId(Text, Lang)) +    return MD_ReservedMacro; +  if (II->isKeyword(Lang)) +    return MD_KeywordDef; +  if (Lang.CPlusPlus11 && (Text.equals("override") || Text.equals("final"))) +    return MD_KeywordDef; +  return MD_NoWarn; +} + +static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) { +  const LangOptions &Lang = PP.getLangOpts(); +  StringRef Text = II->getName(); +  // Do not warn on keyword undef.  It is generally harmless and widely used. +  if (isReservedId(Text, Lang)) +    return MD_ReservedMacro; +  return MD_NoWarn; +} + +// Return true if we want to issue a diagnostic by default if we +// encounter this name in a #include with the wrong case. For now, +// this includes the standard C and C++ headers, Posix headers, +// and Boost headers. Improper case for these #includes is a +// potential portability issue. +static bool warnByDefaultOnWrongCase(StringRef Include) { +  // If the first component of the path is "boost", treat this like a standard header +  // for the purposes of diagnostics. +  if (::llvm::sys::path::begin(Include)->equals_lower("boost")) +    return true; + +  // "condition_variable" is the longest standard header name at 18 characters. +  // If the include file name is longer than that, it can't be a standard header. +  static const size_t MaxStdHeaderNameLen = 18u; +  if (Include.size() > MaxStdHeaderNameLen) +    return false; + +  // Lowercase and normalize the search string. +  SmallString<32> LowerInclude{Include}; +  for (char &Ch : LowerInclude) { +    // In the ASCII range? +    if (static_cast<unsigned char>(Ch) > 0x7f) +      return false; // Can't be a standard header +    // ASCII lowercase: +    if (Ch >= 'A' && Ch <= 'Z') +      Ch += 'a' - 'A'; +    // Normalize path separators for comparison purposes. +    else if (::llvm::sys::path::is_separator(Ch)) +      Ch = '/'; +  } + +  // The standard C/C++ and Posix headers +  return llvm::StringSwitch<bool>(LowerInclude) +    // C library headers +    .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true) +    .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true) +    .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true) +    .Cases("stdatomic.h", "stdbool.h", "stddef.h", "stdint.h", "stdio.h", true) +    .Cases("stdlib.h", "stdnoreturn.h", "string.h", "tgmath.h", "threads.h", true) +    .Cases("time.h", "uchar.h", "wchar.h", "wctype.h", true) + +    // C++ headers for C library facilities +    .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true) +    .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true) +    .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true) +    .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true) +    .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true) +    .Case("cwctype", true) + +    // C++ library headers +    .Cases("algorithm", "fstream", "list", "regex", "thread", true) +    .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true) +    .Cases("atomic", "future", "map", "set", "type_traits", true) +    .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true) +    .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true) +    .Cases("codecvt", "ios", "new", "stack", "unordered_map", true) +    .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true) +    .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true) +    .Cases("deque", "istream", "queue", "string", "valarray", true) +    .Cases("exception", "iterator", "random", "strstream", "vector", true) +    .Cases("forward_list", "limits", "ratio", "system_error", true) + +    // POSIX headers (which aren't also C headers) +    .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true) +    .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true) +    .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true) +    .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true) +    .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true) +    .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true) +    .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true) +    .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true) +    .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true) +    .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true) +    .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true) +    .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true) +    .Default(false); +} + +bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, +                                  bool *ShadowFlag) { +  // Missing macro name? +  if (MacroNameTok.is(tok::eod)) +    return Diag(MacroNameTok, diag::err_pp_missing_macro_name); + +  IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); +  if (!II) +    return Diag(MacroNameTok, diag::err_pp_macro_not_identifier); + +  if (II->isCPlusPlusOperatorKeyword()) { +    // C++ 2.5p2: Alternative tokens behave the same as its primary token +    // except for their spellings. +    Diag(MacroNameTok, getLangOpts().MicrosoftExt +                           ? diag::ext_pp_operator_used_as_macro_name +                           : diag::err_pp_operator_used_as_macro_name) +        << II << MacroNameTok.getKind(); +    // Allow #defining |and| and friends for Microsoft compatibility or +    // recovery when legacy C headers are included in C++. +  } + +  if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) { +    // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4. +    return Diag(MacroNameTok, diag::err_defined_macro_name); +  } + +  if (isDefineUndef == MU_Undef) { +    auto *MI = getMacroInfo(II); +    if (MI && MI->isBuiltinMacro()) { +      // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 +      // and C++ [cpp.predefined]p4], but allow it as an extension. +      Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro); +    } +  } + +  // If defining/undefining reserved identifier or a keyword, we need to issue +  // a warning. +  SourceLocation MacroNameLoc = MacroNameTok.getLocation(); +  if (ShadowFlag) +    *ShadowFlag = false; +  if (!SourceMgr.isInSystemHeader(MacroNameLoc) && +      (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) { +    MacroDiag D = MD_NoWarn; +    if (isDefineUndef == MU_Define) { +      D = shouldWarnOnMacroDef(*this, II); +    } +    else if (isDefineUndef == MU_Undef) +      D = shouldWarnOnMacroUndef(*this, II); +    if (D == MD_KeywordDef) { +      // We do not want to warn on some patterns widely used in configuration +      // scripts.  This requires analyzing next tokens, so do not issue warnings +      // now, only inform caller. +      if (ShadowFlag) +        *ShadowFlag = true; +    } +    if (D == MD_ReservedMacro) +      Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id); +  } + +  // Okay, we got a good identifier. +  return false; +} + +/// Lex and validate a macro name, which occurs after a +/// \#define or \#undef. +/// +/// This sets the token kind to eod and discards the rest of the macro line if +/// the macro name is invalid. +/// +/// \param MacroNameTok Token that is expected to be a macro name. +/// \param isDefineUndef Context in which macro is used. +/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword. +void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, +                                 bool *ShadowFlag) { +  // Read the token, don't allow macro expansion on it. +  LexUnexpandedToken(MacroNameTok); + +  if (MacroNameTok.is(tok::code_completion)) { +    if (CodeComplete) +      CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define); +    setCodeCompletionReached(); +    LexUnexpandedToken(MacroNameTok); +  } + +  if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag)) +    return; + +  // Invalid macro name, read and discard the rest of the line and set the +  // token kind to tok::eod if necessary. +  if (MacroNameTok.isNot(tok::eod)) { +    MacroNameTok.setKind(tok::eod); +    DiscardUntilEndOfDirective(); +  } +} + +/// Ensure that the next token is a tok::eod token. +/// +/// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is +/// true, then we consider macros that expand to zero tokens as being ok. +/// +/// Returns the location of the end of the directive. +SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, +                                                 bool EnableMacros) { +  Token Tmp; +  // Lex unexpanded tokens for most directives: macros might expand to zero +  // tokens, causing us to miss diagnosing invalid lines.  Some directives (like +  // #line) allow empty macros. +  if (EnableMacros) +    Lex(Tmp); +  else +    LexUnexpandedToken(Tmp); + +  // There should be no tokens after the directive, but we allow them as an +  // extension. +  while (Tmp.is(tok::comment))  // Skip comments in -C mode. +    LexUnexpandedToken(Tmp); + +  if (Tmp.is(tok::eod)) +    return Tmp.getLocation(); + +  // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89, +  // or if this is a macro-style preprocessing directive, because it is more +  // trouble than it is worth to insert /**/ and check that there is no /**/ +  // in the range also. +  FixItHint Hint; +  if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && +      !CurTokenLexer) +    Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); +  Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; +  return DiscardUntilEndOfDirective().getEnd(); +} + +Optional<unsigned> Preprocessor::getSkippedRangeForExcludedConditionalBlock( +    SourceLocation HashLoc) { +  if (!ExcludedConditionalDirectiveSkipMappings) +    return None; +  if (!HashLoc.isFileID()) +    return None; + +  std::pair<FileID, unsigned> HashFileOffset = +      SourceMgr.getDecomposedLoc(HashLoc); +  const llvm::MemoryBuffer *Buf = SourceMgr.getBuffer(HashFileOffset.first); +  auto It = ExcludedConditionalDirectiveSkipMappings->find(Buf); +  if (It == ExcludedConditionalDirectiveSkipMappings->end()) +    return None; + +  const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond(); +  // Check if the offset of '#' is mapped in the skipped ranges. +  auto MappingIt = SkippedRanges.find(HashFileOffset.second); +  if (MappingIt == SkippedRanges.end()) +    return None; + +  unsigned BytesToSkip = MappingIt->getSecond(); +  unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset(); +  assert(CurLexerBufferOffset >= HashFileOffset.second && +         "lexer is before the hash?"); +  // Take into account the fact that the lexer has already advanced, so the +  // number of bytes to skip must be adjusted. +  unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second; +  assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?"); +  return BytesToSkip - LengthDiff; +} + +/// SkipExcludedConditionalBlock - We just read a \#if or related directive and +/// decided that the subsequent tokens are in the \#if'd out portion of the +/// file.  Lex the rest of the file, until we see an \#endif.  If +/// FoundNonSkipPortion is true, then we have already emitted code for part of +/// this \#if directive, so \#else/\#elif blocks should never be entered. +/// If ElseOk is true, then \#else directives are ok, if not, then we have +/// already seen one so a \#else directive is a duplicate.  When this returns, +/// the caller can lex the first valid token. +void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, +                                                SourceLocation IfTokenLoc, +                                                bool FoundNonSkipPortion, +                                                bool FoundElse, +                                                SourceLocation ElseLoc) { +  ++NumSkipped; +  assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?"); + +  if (PreambleConditionalStack.reachedEOFWhileSkipping()) +    PreambleConditionalStack.clearSkipInfo(); +  else +    CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false, +                                     FoundNonSkipPortion, FoundElse); + +  // Enter raw mode to disable identifier lookup (and thus macro expansion), +  // disabling warnings, etc. +  CurPPLexer->LexingRawMode = true; +  Token Tok; +  if (auto SkipLength = +          getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) { +    // Skip to the next '#endif' / '#else' / '#elif'. +    CurLexer->skipOver(*SkipLength); +  } +  while (true) { +    CurLexer->Lex(Tok); + +    if (Tok.is(tok::code_completion)) { +      if (CodeComplete) +        CodeComplete->CodeCompleteInConditionalExclusion(); +      setCodeCompletionReached(); +      continue; +    } + +    // If this is the end of the buffer, we have an error. +    if (Tok.is(tok::eof)) { +      // We don't emit errors for unterminated conditionals here, +      // Lexer::LexEndOfFile can do that properly. +      // Just return and let the caller lex after this #include. +      if (PreambleConditionalStack.isRecording()) +        PreambleConditionalStack.SkipInfo.emplace( +            HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc); +      break; +    } + +    // If this token is not a preprocessor directive, just skip it. +    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) +      continue; + +    // We just parsed a # character at the start of a line, so we're in +    // directive mode.  Tell the lexer this so any newlines we see will be +    // converted into an EOD token (this terminates the macro). +    CurPPLexer->ParsingPreprocessorDirective = true; +    if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); + + +    // Read the next token, the directive flavor. +    LexUnexpandedToken(Tok); + +    // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or +    // something bogus), skip it. +    if (Tok.isNot(tok::raw_identifier)) { +      CurPPLexer->ParsingPreprocessorDirective = false; +      // Restore comment saving mode. +      if (CurLexer) CurLexer->resetExtendedTokenMode(); +      continue; +    } + +    // If the first letter isn't i or e, it isn't intesting to us.  We know that +    // this is safe in the face of spelling differences, because there is no way +    // to spell an i/e in a strange way that is another letter.  Skipping this +    // allows us to avoid looking up the identifier info for #define/#undef and +    // other common directives. +    StringRef RI = Tok.getRawIdentifier(); + +    char FirstChar = RI[0]; +    if (FirstChar >= 'a' && FirstChar <= 'z' && +        FirstChar != 'i' && FirstChar != 'e') { +      CurPPLexer->ParsingPreprocessorDirective = false; +      // Restore comment saving mode. +      if (CurLexer) CurLexer->resetExtendedTokenMode(); +      continue; +    } + +    // Get the identifier name without trigraphs or embedded newlines.  Note +    // that we can't use Tok.getIdentifierInfo() because its lookup is disabled +    // when skipping. +    char DirectiveBuf[20]; +    StringRef Directive; +    if (!Tok.needsCleaning() && RI.size() < 20) { +      Directive = RI; +    } else { +      std::string DirectiveStr = getSpelling(Tok); +      size_t IdLen = DirectiveStr.size(); +      if (IdLen >= 20) { +        CurPPLexer->ParsingPreprocessorDirective = false; +        // Restore comment saving mode. +        if (CurLexer) CurLexer->resetExtendedTokenMode(); +        continue; +      } +      memcpy(DirectiveBuf, &DirectiveStr[0], IdLen); +      Directive = StringRef(DirectiveBuf, IdLen); +    } + +    if (Directive.startswith("if")) { +      StringRef Sub = Directive.substr(2); +      if (Sub.empty() ||   // "if" +          Sub == "def" ||   // "ifdef" +          Sub == "ndef") {  // "ifndef" +        // We know the entire #if/#ifdef/#ifndef block will be skipped, don't +        // bother parsing the condition. +        DiscardUntilEndOfDirective(); +        CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true, +                                       /*foundnonskip*/false, +                                       /*foundelse*/false); +      } +    } else if (Directive[0] == 'e') { +      StringRef Sub = Directive.substr(1); +      if (Sub == "ndif") {  // "endif" +        PPConditionalInfo CondInfo; +        CondInfo.WasSkipping = true; // Silence bogus warning. +        bool InCond = CurPPLexer->popConditionalLevel(CondInfo); +        (void)InCond;  // Silence warning in no-asserts mode. +        assert(!InCond && "Can't be skipping if not in a conditional!"); + +        // If we popped the outermost skipping block, we're done skipping! +        if (!CondInfo.WasSkipping) { +          // Restore the value of LexingRawMode so that trailing comments +          // are handled correctly, if we've reached the outermost block. +          CurPPLexer->LexingRawMode = false; +          CheckEndOfDirective("endif"); +          CurPPLexer->LexingRawMode = true; +          if (Callbacks) +            Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc); +          break; +        } else { +          DiscardUntilEndOfDirective(); +        } +      } else if (Sub == "lse") { // "else". +        // #else directive in a skipping conditional.  If not in some other +        // skipping conditional, and if #else hasn't already been seen, enter it +        // as a non-skipping conditional. +        PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); + +        // If this is a #else with a #else before it, report the error. +        if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else); + +        // Note that we've seen a #else in this conditional. +        CondInfo.FoundElse = true; + +        // If the conditional is at the top level, and the #if block wasn't +        // entered, enter the #else block now. +        if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { +          CondInfo.FoundNonSkip = true; +          // Restore the value of LexingRawMode so that trailing comments +          // are handled correctly. +          CurPPLexer->LexingRawMode = false; +          CheckEndOfDirective("else"); +          CurPPLexer->LexingRawMode = true; +          if (Callbacks) +            Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc); +          break; +        } else { +          DiscardUntilEndOfDirective();  // C99 6.10p4. +        } +      } else if (Sub == "lif") {  // "elif". +        PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); + +        // If this is a #elif with a #else before it, report the error. +        if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else); + +        // If this is in a skipping block or if we're already handled this #if +        // block, don't bother parsing the condition. +        if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { +          DiscardUntilEndOfDirective(); +        } else { +          // Restore the value of LexingRawMode so that identifiers are +          // looked up, etc, inside the #elif expression. +          assert(CurPPLexer->LexingRawMode && "We have to be skipping here!"); +          CurPPLexer->LexingRawMode = false; +          IdentifierInfo *IfNDefMacro = nullptr; +          DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); +          const bool CondValue = DER.Conditional; +          CurPPLexer->LexingRawMode = true; +          if (Callbacks) { +            Callbacks->Elif( +                Tok.getLocation(), DER.ExprRange, +                (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False), +                CondInfo.IfLoc); +          } +          // If this condition is true, enter it! +          if (CondValue) { +            CondInfo.FoundNonSkip = true; +            break; +          } +        } +      } +    } + +    CurPPLexer->ParsingPreprocessorDirective = false; +    // Restore comment saving mode. +    if (CurLexer) CurLexer->resetExtendedTokenMode(); +  } + +  // Finally, if we are out of the conditional (saw an #endif or ran off the end +  // of the file, just stop skipping and return to lexing whatever came after +  // the #if block. +  CurPPLexer->LexingRawMode = false; + +  // The last skipped range isn't actually skipped yet if it's truncated +  // by the end of the preamble; we'll resume parsing after the preamble. +  if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble())) +    Callbacks->SourceRangeSkipped( +        SourceRange(HashTokenLoc, CurPPLexer->getSourceLocation()), +        Tok.getLocation()); +} + +Module *Preprocessor::getModuleForLocation(SourceLocation Loc) { +  if (!SourceMgr.isInMainFile(Loc)) { +    // Try to determine the module of the include directive. +    // FIXME: Look into directly passing the FileEntry from LookupFile instead. +    FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc)); +    if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) { +      // The include comes from an included file. +      return HeaderInfo.getModuleMap() +          .findModuleForHeader(EntryOfIncl) +          .getModule(); +    } +  } + +  // This is either in the main file or not in a file at all. It belongs +  // to the current module, if there is one. +  return getLangOpts().CurrentModule.empty() +             ? nullptr +             : HeaderInfo.lookupModule(getLangOpts().CurrentModule); +} + +const FileEntry * +Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, +                                                     Module *M, +                                                     SourceLocation Loc) { +  assert(M && "no module to include"); + +  // If the context is the global module fragment of some module, we never +  // want to return that file; instead, we want the innermost include-guarded +  // header that it included. +  bool InGlobalModuleFragment = M->Kind == Module::GlobalModuleFragment; + +  // If we have a module import syntax, we shouldn't include a header to +  // make a particular module visible. +  if ((getLangOpts().ObjC || getLangOpts().CPlusPlusModules || +       getLangOpts().ModulesTS) && +      !InGlobalModuleFragment) +    return nullptr; + +  Module *TopM = M->getTopLevelModule(); +  Module *IncM = getModuleForLocation(IncLoc); + +  // Walk up through the include stack, looking through textual headers of M +  // until we hit a non-textual header that we can #include. (We assume textual +  // headers of a module with non-textual headers aren't meant to be used to +  // import entities from the module.) +  auto &SM = getSourceManager(); +  while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) { +    auto ID = SM.getFileID(SM.getExpansionLoc(Loc)); +    auto *FE = SM.getFileEntryForID(ID); +    if (!FE) +      break; + +    if (InGlobalModuleFragment) { +      if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE)) +        return FE; +      Loc = SM.getIncludeLoc(ID); +      continue; +    } + +    bool InTextualHeader = false; +    for (auto Header : HeaderInfo.getModuleMap().findAllModulesForHeader(FE)) { +      if (!Header.getModule()->isSubModuleOf(TopM)) +        continue; + +      if (!(Header.getRole() & ModuleMap::TextualHeader)) { +        // If this is an accessible, non-textual header of M's top-level module +        // that transitively includes the given location and makes the +        // corresponding module visible, this is the thing to #include. +        if (Header.isAccessibleFrom(IncM)) +          return FE; + +        // It's in a private header; we can't #include it. +        // FIXME: If there's a public header in some module that re-exports it, +        // then we could suggest including that, but it's not clear that's the +        // expected way to make this entity visible. +        continue; +      } + +      InTextualHeader = true; +    } + +    if (!InTextualHeader) +      break; + +    Loc = SM.getIncludeLoc(ID); +  } + +  return nullptr; +} + +Optional<FileEntryRef> Preprocessor::LookupFile( +    SourceLocation FilenameLoc, StringRef Filename, bool isAngled, +    const DirectoryLookup *FromDir, const FileEntry *FromFile, +    const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath, +    SmallVectorImpl<char> *RelativePath, +    ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, +    bool *IsFrameworkFound, bool SkipCache) { +  Module *RequestingModule = getModuleForLocation(FilenameLoc); +  bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc); + +  // If the header lookup mechanism may be relative to the current inclusion +  // stack, record the parent #includes. +  SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 16> +      Includers; +  bool BuildSystemModule = false; +  if (!FromDir && !FromFile) { +    FileID FID = getCurrentFileLexer()->getFileID(); +    const FileEntry *FileEnt = SourceMgr.getFileEntryForID(FID); + +    // If there is no file entry associated with this file, it must be the +    // predefines buffer or the module includes buffer. Any other file is not +    // lexed with a normal lexer, so it won't be scanned for preprocessor +    // directives. +    // +    // If we have the predefines buffer, resolve #include references (which come +    // from the -include command line argument) from the current working +    // directory instead of relative to the main file. +    // +    // If we have the module includes buffer, resolve #include references (which +    // come from header declarations in the module map) relative to the module +    // map file. +    if (!FileEnt) { +      if (FID == SourceMgr.getMainFileID() && MainFileDir) { +        Includers.push_back(std::make_pair(nullptr, MainFileDir)); +        BuildSystemModule = getCurrentModule()->IsSystem; +      } else if ((FileEnt = +                    SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()))) +        Includers.push_back(std::make_pair(FileEnt, *FileMgr.getDirectory("."))); +    } else { +      Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir())); +    } + +    // MSVC searches the current include stack from top to bottom for +    // headers included by quoted include directives. +    // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx +    if (LangOpts.MSVCCompat && !isAngled) { +      for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) { +        if (IsFileLexer(ISEntry)) +          if ((FileEnt = ISEntry.ThePPLexer->getFileEntry())) +            Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir())); +      } +    } +  } + +  CurDir = CurDirLookup; + +  if (FromFile) { +    // We're supposed to start looking from after a particular file. Search +    // the include path until we find that file or run out of files. +    const DirectoryLookup *TmpCurDir = CurDir; +    const DirectoryLookup *TmpFromDir = nullptr; +    while (Optional<FileEntryRef> FE = HeaderInfo.LookupFile( +               Filename, FilenameLoc, isAngled, TmpFromDir, TmpCurDir, +               Includers, SearchPath, RelativePath, RequestingModule, +               SuggestedModule, /*IsMapped=*/nullptr, +               /*IsFrameworkFound=*/nullptr, SkipCache)) { +      // Keep looking as if this file did a #include_next. +      TmpFromDir = TmpCurDir; +      ++TmpFromDir; +      if (&FE->getFileEntry() == FromFile) { +        // Found it. +        FromDir = TmpFromDir; +        CurDir = TmpCurDir; +        break; +      } +    } +  } + +  // Do a standard file entry lookup. +  Optional<FileEntryRef> FE = HeaderInfo.LookupFile( +      Filename, FilenameLoc, isAngled, FromDir, CurDir, Includers, SearchPath, +      RelativePath, RequestingModule, SuggestedModule, IsMapped, +      IsFrameworkFound, SkipCache, BuildSystemModule); +  if (FE) { +    if (SuggestedModule && !LangOpts.AsmPreprocessor) +      HeaderInfo.getModuleMap().diagnoseHeaderInclusion( +          RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc, +          Filename, &FE->getFileEntry()); +    return FE; +  } + +  const FileEntry *CurFileEnt; +  // Otherwise, see if this is a subframework header.  If so, this is relative +  // to one of the headers on the #include stack.  Walk the list of the current +  // headers on the #include stack and pass them to HeaderInfo. +  if (IsFileLexer()) { +    if ((CurFileEnt = CurPPLexer->getFileEntry())) { +      if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader( +              Filename, CurFileEnt, SearchPath, RelativePath, RequestingModule, +              SuggestedModule)) { +        if (SuggestedModule && !LangOpts.AsmPreprocessor) +          HeaderInfo.getModuleMap().diagnoseHeaderInclusion( +              RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc, +              Filename, &FE->getFileEntry()); +        return FE; +      } +    } +  } + +  for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) { +    if (IsFileLexer(ISEntry)) { +      if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) { +        if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader( +                Filename, CurFileEnt, SearchPath, RelativePath, +                RequestingModule, SuggestedModule)) { +          if (SuggestedModule && !LangOpts.AsmPreprocessor) +            HeaderInfo.getModuleMap().diagnoseHeaderInclusion( +                RequestingModule, RequestingModuleIsModuleInterface, +                FilenameLoc, Filename, &FE->getFileEntry()); +          return FE; +        } +      } +    } +  } + +  // Otherwise, we really couldn't find the file. +  return None; +} + +//===----------------------------------------------------------------------===// +// Preprocessor Directive Handling. +//===----------------------------------------------------------------------===// + +class Preprocessor::ResetMacroExpansionHelper { +public: +  ResetMacroExpansionHelper(Preprocessor *pp) +    : PP(pp), save(pp->DisableMacroExpansion) { +    if (pp->MacroExpansionInDirectivesOverride) +      pp->DisableMacroExpansion = false; +  } + +  ~ResetMacroExpansionHelper() { +    PP->DisableMacroExpansion = save; +  } + +private: +  Preprocessor *PP; +  bool save; +}; + +/// Process a directive while looking for the through header or a #pragma +/// hdrstop. The following directives are handled: +/// #include (to check if it is the through header) +/// #define (to warn about macros that don't match the PCH) +/// #pragma (to check for pragma hdrstop). +/// All other directives are completely discarded. +void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result, +                                                       SourceLocation HashLoc) { +  if (const IdentifierInfo *II = Result.getIdentifierInfo()) { +    if (II->getPPKeywordID() == tok::pp_define) { +      return HandleDefineDirective(Result, +                                   /*ImmediatelyAfterHeaderGuard=*/false); +    } +    if (SkippingUntilPCHThroughHeader && +        II->getPPKeywordID() == tok::pp_include) { +      return HandleIncludeDirective(HashLoc, Result); +    } +    if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) { +      Lex(Result); +      auto *II = Result.getIdentifierInfo(); +      if (II && II->getName() == "hdrstop") +        return HandlePragmaHdrstop(Result); +    } +  } +  DiscardUntilEndOfDirective(); +} + +/// HandleDirective - This callback is invoked when the lexer sees a # token +/// at the start of a line.  This consumes the directive, modifies the +/// lexer/preprocessor state, and advances the lexer(s) so that the next token +/// read is the correct one. +void Preprocessor::HandleDirective(Token &Result) { +  // FIXME: Traditional: # with whitespace before it not recognized by K&R? + +  // We just parsed a # character at the start of a line, so we're in directive +  // mode.  Tell the lexer this so any newlines we see will be converted into an +  // EOD token (which terminates the directive). +  CurPPLexer->ParsingPreprocessorDirective = true; +  if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); + +  bool ImmediatelyAfterTopLevelIfndef = +      CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef(); +  CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef(); + +  ++NumDirectives; + +  // We are about to read a token.  For the multiple-include optimization FA to +  // work, we have to remember if we had read any tokens *before* this +  // pp-directive. +  bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); + +  // Save the '#' token in case we need to return it later. +  Token SavedHash = Result; + +  // Read the next token, the directive flavor.  This isn't expanded due to +  // C99 6.10.3p8. +  LexUnexpandedToken(Result); + +  // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.: +  //   #define A(x) #x +  //   A(abc +  //     #warning blah +  //   def) +  // If so, the user is relying on undefined behavior, emit a diagnostic. Do +  // not support this for #include-like directives, since that can result in +  // terrible diagnostics, and does not work in GCC. +  if (InMacroArgs) { +    if (IdentifierInfo *II = Result.getIdentifierInfo()) { +      switch (II->getPPKeywordID()) { +      case tok::pp_include: +      case tok::pp_import: +      case tok::pp_include_next: +      case tok::pp___include_macros: +      case tok::pp_pragma: +        Diag(Result, diag::err_embedded_directive) << II->getName(); +        Diag(*ArgMacro, diag::note_macro_expansion_here) +            << ArgMacro->getIdentifierInfo(); +        DiscardUntilEndOfDirective(); +        return; +      default: +        break; +      } +    } +    Diag(Result, diag::ext_embedded_directive); +  } + +  // Temporarily enable macro expansion if set so +  // and reset to previous state when returning from this function. +  ResetMacroExpansionHelper helper(this); + +  if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop) +    return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation()); + +  switch (Result.getKind()) { +  case tok::eod: +    return;   // null directive. +  case tok::code_completion: +    if (CodeComplete) +      CodeComplete->CodeCompleteDirective( +                                    CurPPLexer->getConditionalStackDepth() > 0); +    setCodeCompletionReached(); +    return; +  case tok::numeric_constant:  // # 7  GNU line marker directive. +    if (getLangOpts().AsmPreprocessor) +      break;  // # 4 is not a preprocessor directive in .S files. +    return HandleDigitDirective(Result); +  default: +    IdentifierInfo *II = Result.getIdentifierInfo(); +    if (!II) break; // Not an identifier. + +    // Ask what the preprocessor keyword ID is. +    switch (II->getPPKeywordID()) { +    default: break; +    // C99 6.10.1 - Conditional Inclusion. +    case tok::pp_if: +      return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective); +    case tok::pp_ifdef: +      return HandleIfdefDirective(Result, SavedHash, false, +                                  true /*not valid for miopt*/); +    case tok::pp_ifndef: +      return HandleIfdefDirective(Result, SavedHash, true, +                                  ReadAnyTokensBeforeDirective); +    case tok::pp_elif: +      return HandleElifDirective(Result, SavedHash); +    case tok::pp_else: +      return HandleElseDirective(Result, SavedHash); +    case tok::pp_endif: +      return HandleEndifDirective(Result); + +    // C99 6.10.2 - Source File Inclusion. +    case tok::pp_include: +      // Handle #include. +      return HandleIncludeDirective(SavedHash.getLocation(), Result); +    case tok::pp___include_macros: +      // Handle -imacros. +      return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result); + +    // C99 6.10.3 - Macro Replacement. +    case tok::pp_define: +      return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef); +    case tok::pp_undef: +      return HandleUndefDirective(); + +    // C99 6.10.4 - Line Control. +    case tok::pp_line: +      return HandleLineDirective(); + +    // C99 6.10.5 - Error Directive. +    case tok::pp_error: +      return HandleUserDiagnosticDirective(Result, false); + +    // C99 6.10.6 - Pragma Directive. +    case tok::pp_pragma: +      return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); + +    // GNU Extensions. +    case tok::pp_import: +      return HandleImportDirective(SavedHash.getLocation(), Result); +    case tok::pp_include_next: +      return HandleIncludeNextDirective(SavedHash.getLocation(), Result); + +    case tok::pp_warning: +      Diag(Result, diag::ext_pp_warning_directive); +      return HandleUserDiagnosticDirective(Result, true); +    case tok::pp_ident: +      return HandleIdentSCCSDirective(Result); +    case tok::pp_sccs: +      return HandleIdentSCCSDirective(Result); +    case tok::pp_assert: +      //isExtension = true;  // FIXME: implement #assert +      break; +    case tok::pp_unassert: +      //isExtension = true;  // FIXME: implement #unassert +      break; + +    case tok::pp___public_macro: +      if (getLangOpts().Modules) +        return HandleMacroPublicDirective(Result); +      break; + +    case tok::pp___private_macro: +      if (getLangOpts().Modules) +        return HandleMacroPrivateDirective(); +      break; +    } +    break; +  } + +  // If this is a .S file, treat unknown # directives as non-preprocessor +  // directives.  This is important because # may be a comment or introduce +  // various pseudo-ops.  Just return the # token and push back the following +  // token to be lexed next time. +  if (getLangOpts().AsmPreprocessor) { +    auto Toks = std::make_unique<Token[]>(2); +    // Return the # and the token after it. +    Toks[0] = SavedHash; +    Toks[1] = Result; + +    // If the second token is a hashhash token, then we need to translate it to +    // unknown so the token lexer doesn't try to perform token pasting. +    if (Result.is(tok::hashhash)) +      Toks[1].setKind(tok::unknown); + +    // Enter this token stream so that we re-lex the tokens.  Make sure to +    // enable macro expansion, in case the token after the # is an identifier +    // that is expanded. +    EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false); +    return; +  } + +  // If we reached here, the preprocessing token is not valid! +  Diag(Result, diag::err_pp_invalid_directive); + +  // Read the rest of the PP line. +  DiscardUntilEndOfDirective(); + +  // Okay, we're done parsing the directive. +} + +/// GetLineValue - Convert a numeric token into an unsigned value, emitting +/// Diagnostic DiagID if it is invalid, and returning the value in Val. +static bool GetLineValue(Token &DigitTok, unsigned &Val, +                         unsigned DiagID, Preprocessor &PP, +                         bool IsGNULineDirective=false) { +  if (DigitTok.isNot(tok::numeric_constant)) { +    PP.Diag(DigitTok, DiagID); + +    if (DigitTok.isNot(tok::eod)) +      PP.DiscardUntilEndOfDirective(); +    return true; +  } + +  SmallString<64> IntegerBuffer; +  IntegerBuffer.resize(DigitTok.getLength()); +  const char *DigitTokBegin = &IntegerBuffer[0]; +  bool Invalid = false; +  unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid); +  if (Invalid) +    return true; + +  // Verify that we have a simple digit-sequence, and compute the value.  This +  // is always a simple digit string computed in decimal, so we do this manually +  // here. +  Val = 0; +  for (unsigned i = 0; i != ActualLength; ++i) { +    // C++1y [lex.fcon]p1: +    //   Optional separating single quotes in a digit-sequence are ignored +    if (DigitTokBegin[i] == '\'') +      continue; + +    if (!isDigit(DigitTokBegin[i])) { +      PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i), +              diag::err_pp_line_digit_sequence) << IsGNULineDirective; +      PP.DiscardUntilEndOfDirective(); +      return true; +    } + +    unsigned NextVal = Val*10+(DigitTokBegin[i]-'0'); +    if (NextVal < Val) { // overflow. +      PP.Diag(DigitTok, DiagID); +      PP.DiscardUntilEndOfDirective(); +      return true; +    } +    Val = NextVal; +  } + +  if (DigitTokBegin[0] == '0' && Val) +    PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal) +      << IsGNULineDirective; + +  return false; +} + +/// Handle a \#line directive: C99 6.10.4. +/// +/// The two acceptable forms are: +/// \verbatim +///   # line digit-sequence +///   # line digit-sequence "s-char-sequence" +/// \endverbatim +void Preprocessor::HandleLineDirective() { +  // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are +  // expanded. +  Token DigitTok; +  Lex(DigitTok); + +  // Validate the number and convert it to an unsigned. +  unsigned LineNo; +  if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this)) +    return; + +  if (LineNo == 0) +    Diag(DigitTok, diag::ext_pp_line_zero); + +  // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a +  // number greater than 2147483647".  C90 requires that the line # be <= 32767. +  unsigned LineLimit = 32768U; +  if (LangOpts.C99 || LangOpts.CPlusPlus11) +    LineLimit = 2147483648U; +  if (LineNo >= LineLimit) +    Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit; +  else if (LangOpts.CPlusPlus11 && LineNo >= 32768U) +    Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big); + +  int FilenameID = -1; +  Token StrTok; +  Lex(StrTok); + +  // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a +  // string followed by eod. +  if (StrTok.is(tok::eod)) +    ; // ok +  else if (StrTok.isNot(tok::string_literal)) { +    Diag(StrTok, diag::err_pp_line_invalid_filename); +    DiscardUntilEndOfDirective(); +    return; +  } else if (StrTok.hasUDSuffix()) { +    Diag(StrTok, diag::err_invalid_string_udl); +    DiscardUntilEndOfDirective(); +    return; +  } else { +    // Parse and validate the string, converting it into a unique ID. +    StringLiteralParser Literal(StrTok, *this); +    assert(Literal.isAscii() && "Didn't allow wide strings in"); +    if (Literal.hadError) { +      DiscardUntilEndOfDirective(); +      return; +    } +    if (Literal.Pascal) { +      Diag(StrTok, diag::err_pp_linemarker_invalid_filename); +      DiscardUntilEndOfDirective(); +      return; +    } +    FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); + +    // Verify that there is nothing after the string, other than EOD.  Because +    // of C99 6.10.4p5, macros that expand to empty tokens are ok. +    CheckEndOfDirective("line", true); +  } + +  // Take the file kind of the file containing the #line directive. #line +  // directives are often used for generated sources from the same codebase, so +  // the new file should generally be classified the same way as the current +  // file. This is visible in GCC's pre-processed output, which rewrites #line +  // to GNU line markers. +  SrcMgr::CharacteristicKind FileKind = +      SourceMgr.getFileCharacteristic(DigitTok.getLocation()); + +  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false, +                        false, FileKind); + +  if (Callbacks) +    Callbacks->FileChanged(CurPPLexer->getSourceLocation(), +                           PPCallbacks::RenameFile, FileKind); +} + +/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line +/// marker directive. +static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, +                                SrcMgr::CharacteristicKind &FileKind, +                                Preprocessor &PP) { +  unsigned FlagVal; +  Token FlagTok; +  PP.Lex(FlagTok); +  if (FlagTok.is(tok::eod)) return false; +  if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP)) +    return true; + +  if (FlagVal == 1) { +    IsFileEntry = true; + +    PP.Lex(FlagTok); +    if (FlagTok.is(tok::eod)) return false; +    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) +      return true; +  } else if (FlagVal == 2) { +    IsFileExit = true; + +    SourceManager &SM = PP.getSourceManager(); +    // If we are leaving the current presumed file, check to make sure the +    // presumed include stack isn't empty! +    FileID CurFileID = +      SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first; +    PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation()); +    if (PLoc.isInvalid()) +      return true; + +    // If there is no include loc (main file) or if the include loc is in a +    // different physical file, then we aren't in a "1" line marker flag region. +    SourceLocation IncLoc = PLoc.getIncludeLoc(); +    if (IncLoc.isInvalid() || +        SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) { +      PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop); +      PP.DiscardUntilEndOfDirective(); +      return true; +    } + +    PP.Lex(FlagTok); +    if (FlagTok.is(tok::eod)) return false; +    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) +      return true; +  } + +  // We must have 3 if there are still flags. +  if (FlagVal != 3) { +    PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); +    PP.DiscardUntilEndOfDirective(); +    return true; +  } + +  FileKind = SrcMgr::C_System; + +  PP.Lex(FlagTok); +  if (FlagTok.is(tok::eod)) return false; +  if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP)) +    return true; + +  // We must have 4 if there is yet another flag. +  if (FlagVal != 4) { +    PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); +    PP.DiscardUntilEndOfDirective(); +    return true; +  } + +  FileKind = SrcMgr::C_ExternCSystem; + +  PP.Lex(FlagTok); +  if (FlagTok.is(tok::eod)) return false; + +  // There are no more valid flags here. +  PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); +  PP.DiscardUntilEndOfDirective(); +  return true; +} + +/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is +/// one of the following forms: +/// +///     # 42 +///     # 42 "file" ('1' | '2')? +///     # 42 "file" ('1' | '2')? '3' '4'? +/// +void Preprocessor::HandleDigitDirective(Token &DigitTok) { +  // Validate the number and convert it to an unsigned.  GNU does not have a +  // line # limit other than it fit in 32-bits. +  unsigned LineNo; +  if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer, +                   *this, true)) +    return; + +  Token StrTok; +  Lex(StrTok); + +  bool IsFileEntry = false, IsFileExit = false; +  int FilenameID = -1; +  SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; + +  // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a +  // string followed by eod. +  if (StrTok.is(tok::eod)) { +    // Treat this like "#line NN", which doesn't change file characteristics. +    FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation()); +  } else if (StrTok.isNot(tok::string_literal)) { +    Diag(StrTok, diag::err_pp_linemarker_invalid_filename); +    DiscardUntilEndOfDirective(); +    return; +  } else if (StrTok.hasUDSuffix()) { +    Diag(StrTok, diag::err_invalid_string_udl); +    DiscardUntilEndOfDirective(); +    return; +  } else { +    // Parse and validate the string, converting it into a unique ID. +    StringLiteralParser Literal(StrTok, *this); +    assert(Literal.isAscii() && "Didn't allow wide strings in"); +    if (Literal.hadError) { +      DiscardUntilEndOfDirective(); +      return; +    } +    if (Literal.Pascal) { +      Diag(StrTok, diag::err_pp_linemarker_invalid_filename); +      DiscardUntilEndOfDirective(); +      return; +    } +    FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); + +    // If a filename was present, read any flags that are present. +    if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this)) +      return; +  } + +  // Create a line note with this information. +  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry, +                        IsFileExit, FileKind); + +  // If the preprocessor has callbacks installed, notify them of the #line +  // change.  This is used so that the line marker comes out in -E mode for +  // example. +  if (Callbacks) { +    PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile; +    if (IsFileEntry) +      Reason = PPCallbacks::EnterFile; +    else if (IsFileExit) +      Reason = PPCallbacks::ExitFile; + +    Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind); +  } +} + +/// HandleUserDiagnosticDirective - Handle a #warning or #error directive. +/// +void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, +                                                 bool isWarning) { +  // Read the rest of the line raw.  We do this because we don't want macros +  // to be expanded and we don't require that the tokens be valid preprocessing +  // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does +  // collapse multiple consecutive white space between tokens, but this isn't +  // specified by the standard. +  SmallString<128> Message; +  CurLexer->ReadToEndOfLine(&Message); + +  // Find the first non-whitespace character, so that we can make the +  // diagnostic more succinct. +  StringRef Msg = StringRef(Message).ltrim(' '); + +  if (isWarning) +    Diag(Tok, diag::pp_hash_warning) << Msg; +  else +    Diag(Tok, diag::err_pp_hash_error) << Msg; +} + +/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive. +/// +void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { +  // Yes, this directive is an extension. +  Diag(Tok, diag::ext_pp_ident_directive); + +  // Read the string argument. +  Token StrTok; +  Lex(StrTok); + +  // If the token kind isn't a string, it's a malformed directive. +  if (StrTok.isNot(tok::string_literal) && +      StrTok.isNot(tok::wide_string_literal)) { +    Diag(StrTok, diag::err_pp_malformed_ident); +    if (StrTok.isNot(tok::eod)) +      DiscardUntilEndOfDirective(); +    return; +  } + +  if (StrTok.hasUDSuffix()) { +    Diag(StrTok, diag::err_invalid_string_udl); +    DiscardUntilEndOfDirective(); +    return; +  } + +  // Verify that there is nothing after the string, other than EOD. +  CheckEndOfDirective("ident"); + +  if (Callbacks) { +    bool Invalid = false; +    std::string Str = getSpelling(StrTok, &Invalid); +    if (!Invalid) +      Callbacks->Ident(Tok.getLocation(), Str); +  } +} + +/// Handle a #public directive. +void Preprocessor::HandleMacroPublicDirective(Token &Tok) { +  Token MacroNameTok; +  ReadMacroName(MacroNameTok, MU_Undef); + +  // Error reading macro name?  If so, diagnostic already issued. +  if (MacroNameTok.is(tok::eod)) +    return; + +  // Check to see if this is the last token on the #__public_macro line. +  CheckEndOfDirective("__public_macro"); + +  IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); +  // Okay, we finally have a valid identifier to undef. +  MacroDirective *MD = getLocalMacroDirective(II); + +  // If the macro is not defined, this is an error. +  if (!MD) { +    Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; +    return; +  } + +  // Note that this macro has now been exported. +  appendMacroDirective(II, AllocateVisibilityMacroDirective( +                                MacroNameTok.getLocation(), /*isPublic=*/true)); +} + +/// Handle a #private directive. +void Preprocessor::HandleMacroPrivateDirective() { +  Token MacroNameTok; +  ReadMacroName(MacroNameTok, MU_Undef); + +  // Error reading macro name?  If so, diagnostic already issued. +  if (MacroNameTok.is(tok::eod)) +    return; + +  // Check to see if this is the last token on the #__private_macro line. +  CheckEndOfDirective("__private_macro"); + +  IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); +  // Okay, we finally have a valid identifier to undef. +  MacroDirective *MD = getLocalMacroDirective(II); + +  // If the macro is not defined, this is an error. +  if (!MD) { +    Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; +    return; +  } + +  // Note that this macro has now been marked private. +  appendMacroDirective(II, AllocateVisibilityMacroDirective( +                               MacroNameTok.getLocation(), /*isPublic=*/false)); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Include Directive Handling. +//===----------------------------------------------------------------------===// + +/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully +/// checked and spelled filename, e.g. as an operand of \#include. This returns +/// true if the input filename was in <>'s or false if it were in ""'s.  The +/// caller is expected to provide a buffer that is large enough to hold the +/// spelling of the filename, but is also expected to handle the case when +/// this method decides to use a different buffer. +bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, +                                              StringRef &Buffer) { +  // Get the text form of the filename. +  assert(!Buffer.empty() && "Can't have tokens with empty spellings!"); + +  // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and +  // C++20 [lex.header]/2: +  // +  // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then +  //   in C: behavior is undefined +  //   in C++: program is conditionally-supported with implementation-defined +  //           semantics + +  // Make sure the filename is <x> or "x". +  bool isAngled; +  if (Buffer[0] == '<') { +    if (Buffer.back() != '>') { +      Diag(Loc, diag::err_pp_expects_filename); +      Buffer = StringRef(); +      return true; +    } +    isAngled = true; +  } else if (Buffer[0] == '"') { +    if (Buffer.back() != '"') { +      Diag(Loc, diag::err_pp_expects_filename); +      Buffer = StringRef(); +      return true; +    } +    isAngled = false; +  } else { +    Diag(Loc, diag::err_pp_expects_filename); +    Buffer = StringRef(); +    return true; +  } + +  // Diagnose #include "" as invalid. +  if (Buffer.size() <= 2) { +    Diag(Loc, diag::err_pp_empty_filename); +    Buffer = StringRef(); +    return true; +  } + +  // Skip the brackets. +  Buffer = Buffer.substr(1, Buffer.size()-2); +  return isAngled; +} + +/// Push a token onto the token stream containing an annotation. +void Preprocessor::EnterAnnotationToken(SourceRange Range, +                                        tok::TokenKind Kind, +                                        void *AnnotationVal) { +  // FIXME: Produce this as the current token directly, rather than +  // allocating a new token for it. +  auto Tok = std::make_unique<Token[]>(1); +  Tok[0].startToken(); +  Tok[0].setKind(Kind); +  Tok[0].setLocation(Range.getBegin()); +  Tok[0].setAnnotationEndLoc(Range.getEnd()); +  Tok[0].setAnnotationValue(AnnotationVal); +  EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false); +} + +/// Produce a diagnostic informing the user that a #include or similar +/// was implicitly treated as a module import. +static void diagnoseAutoModuleImport( +    Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok, +    ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path, +    SourceLocation PathEnd) { +  StringRef ImportKeyword; +  if (PP.getLangOpts().ObjC) +    ImportKeyword = "@import"; +  else if (PP.getLangOpts().ModulesTS || PP.getLangOpts().CPlusPlusModules) +    ImportKeyword = "import"; +  else +    return; // no import syntax available + +  SmallString<128> PathString; +  for (size_t I = 0, N = Path.size(); I != N; ++I) { +    if (I) +      PathString += '.'; +    PathString += Path[I].first->getName(); +  } +  int IncludeKind = 0; + +  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { +  case tok::pp_include: +    IncludeKind = 0; +    break; + +  case tok::pp_import: +    IncludeKind = 1; +    break; + +  case tok::pp_include_next: +    IncludeKind = 2; +    break; + +  case tok::pp___include_macros: +    IncludeKind = 3; +    break; + +  default: +    llvm_unreachable("unknown include directive kind"); +  } + +  CharSourceRange ReplaceRange(SourceRange(HashLoc, PathEnd), +                               /*IsTokenRange=*/false); +  PP.Diag(HashLoc, diag::warn_auto_module_import) +      << IncludeKind << PathString +      << FixItHint::CreateReplacement( +             ReplaceRange, (ImportKeyword + " " + PathString + ";").str()); +} + +// Given a vector of path components and a string containing the real +// path to the file, build a properly-cased replacement in the vector, +// and return true if the replacement should be suggested. +static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components, +                            StringRef RealPathName) { +  auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName); +  auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName); +  int Cnt = 0; +  bool SuggestReplacement = false; +  // Below is a best-effort to handle ".." in paths. It is admittedly +  // not 100% correct in the presence of symlinks. +  for (auto &Component : llvm::reverse(Components)) { +    if ("." == Component) { +    } else if (".." == Component) { +      ++Cnt; +    } else if (Cnt) { +      --Cnt; +    } else if (RealPathComponentIter != RealPathComponentEnd) { +      if (Component != *RealPathComponentIter) { +        // If these path components differ by more than just case, then we +        // may be looking at symlinked paths. Bail on this diagnostic to avoid +        // noisy false positives. +        SuggestReplacement = RealPathComponentIter->equals_lower(Component); +        if (!SuggestReplacement) +          break; +        Component = *RealPathComponentIter; +      } +      ++RealPathComponentIter; +    } +  } +  return SuggestReplacement; +} + +bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts, +                                          const TargetInfo &TargetInfo, +                                          DiagnosticsEngine &Diags, Module *M) { +  Module::Requirement Requirement; +  Module::UnresolvedHeaderDirective MissingHeader; +  Module *ShadowingModule = nullptr; +  if (M->isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader, +                     ShadowingModule)) +    return false; + +  if (MissingHeader.FileNameLoc.isValid()) { +    Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing) +        << MissingHeader.IsUmbrella << MissingHeader.FileName; +  } else if (ShadowingModule) { +    Diags.Report(M->DefinitionLoc, diag::err_module_shadowed) << M->Name; +    Diags.Report(ShadowingModule->DefinitionLoc, +                 diag::note_previous_definition); +  } else { +    // FIXME: Track the location at which the requirement was specified, and +    // use it here. +    Diags.Report(M->DefinitionLoc, diag::err_module_unavailable) +        << M->getFullModuleName() << Requirement.second << Requirement.first; +  } +  return true; +} + +/// HandleIncludeDirective - The "\#include" tokens have just been read, read +/// the file to be included from the lexer, then include it!  This is a common +/// routine with functionality shared between \#include, \#include_next and +/// \#import.  LookupFrom is set when this is a \#include_next directive, it +/// specifies the file to start searching from. +void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, +                                          Token &IncludeTok, +                                          const DirectoryLookup *LookupFrom, +                                          const FileEntry *LookupFromFile) { +  Token FilenameTok; +  if (LexHeaderName(FilenameTok)) +    return; + +  if (FilenameTok.isNot(tok::header_name)) { +    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); +    if (FilenameTok.isNot(tok::eod)) +      DiscardUntilEndOfDirective(); +    return; +  } + +  // Verify that there is nothing after the filename, other than EOD.  Note +  // that we allow macros that expand to nothing after the filename, because +  // this falls into the category of "#include pp-tokens new-line" specified +  // in C99 6.10.2p4. +  SourceLocation EndLoc = +      CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true); + +  auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok, +                                            EndLoc, LookupFrom, LookupFromFile); +  switch (Action.Kind) { +  case ImportAction::None: +  case ImportAction::SkippedModuleImport: +    break; +  case ImportAction::ModuleBegin: +    EnterAnnotationToken(SourceRange(HashLoc, EndLoc), +                         tok::annot_module_begin, Action.ModuleForHeader); +    break; +  case ImportAction::ModuleImport: +    EnterAnnotationToken(SourceRange(HashLoc, EndLoc), +                         tok::annot_module_include, Action.ModuleForHeader); +    break; +  } +} + +Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport( +    const DirectoryLookup *&CurDir, StringRef Filename, +    SourceLocation FilenameLoc, CharSourceRange FilenameRange, +    const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, +    bool &IsMapped, const DirectoryLookup *LookupFrom, +    const FileEntry *LookupFromFile, StringRef LookupFilename, +    SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, +    ModuleMap::KnownHeader &SuggestedModule, bool isAngled) { +  Optional<FileEntryRef> File = LookupFile( +      FilenameLoc, LookupFilename, +      isAngled, LookupFrom, LookupFromFile, CurDir, +      Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, +      &SuggestedModule, &IsMapped, &IsFrameworkFound); +  if (File) +    return File; + +  if (Callbacks) { +    // Give the clients a chance to recover. +    SmallString<128> RecoveryPath; +    if (Callbacks->FileNotFound(Filename, RecoveryPath)) { +      if (auto DE = FileMgr.getOptionalDirectoryRef(RecoveryPath)) { +        // Add the recovery path to the list of search paths. +        DirectoryLookup DL(*DE, SrcMgr::C_User, false); +        HeaderInfo.AddSearchPath(DL, isAngled); + +        // Try the lookup again, skipping the cache. +        Optional<FileEntryRef> File = LookupFile( +            FilenameLoc, +            LookupFilename, isAngled, +            LookupFrom, LookupFromFile, CurDir, nullptr, nullptr, +            &SuggestedModule, &IsMapped, /*IsFrameworkFound=*/nullptr, +            /*SkipCache*/ true); +        if (File) +          return File; +      } +    } +  } + +  if (SuppressIncludeNotFoundError) +    return None; + +  // If the file could not be located and it was included via angle +  // brackets, we can attempt a lookup as though it were a quoted path to +  // provide the user with a possible fixit. +  if (isAngled) { +    Optional<FileEntryRef> File = LookupFile( +        FilenameLoc, LookupFilename, +        false, LookupFrom, LookupFromFile, CurDir, +        Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, +        &SuggestedModule, &IsMapped, +        /*IsFrameworkFound=*/nullptr); +    if (File) { +      Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal) +          << Filename << IsImportDecl +          << FixItHint::CreateReplacement(FilenameRange, +                                          "\"" + Filename.str() + "\""); +      return File; +    } +  } + +  // Check for likely typos due to leading or trailing non-isAlphanumeric +  // characters +  StringRef OriginalFilename = Filename; +  if (LangOpts.SpellChecking) { +    // A heuristic to correct a typo file name by removing leading and +    // trailing non-isAlphanumeric characters. +    auto CorrectTypoFilename = [](llvm::StringRef Filename) { +      Filename = Filename.drop_until(isAlphanumeric); +      while (!Filename.empty() && !isAlphanumeric(Filename.back())) { +        Filename = Filename.drop_back(); +      } +      return Filename; +    }; +    StringRef TypoCorrectionName = CorrectTypoFilename(Filename); + +#ifndef _WIN32 +    // Normalize slashes when compiling with -fms-extensions on non-Windows. +    // This is unnecessary on Windows since the filesystem there handles +    // backslashes. +    SmallString<128> NormalizedTypoCorrectionPath; +    if (LangOpts.MicrosoftExt) { +      NormalizedTypoCorrectionPath = TypoCorrectionName; +      llvm::sys::path::native(NormalizedTypoCorrectionPath); +      TypoCorrectionName = NormalizedTypoCorrectionPath; +    } +#endif + +    Optional<FileEntryRef> File = LookupFile( +        FilenameLoc, TypoCorrectionName, isAngled, LookupFrom, LookupFromFile, +        CurDir, Callbacks ? &SearchPath : nullptr, +        Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, +        /*IsFrameworkFound=*/nullptr); +    if (File) { +      auto Hint = +          isAngled ? FixItHint::CreateReplacement( +                         FilenameRange, "<" + TypoCorrectionName.str() + ">") +                   : FixItHint::CreateReplacement( +                         FilenameRange, "\"" + TypoCorrectionName.str() + "\""); +      Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal) +          << OriginalFilename << TypoCorrectionName << Hint; +      // We found the file, so set the Filename to the name after typo +      // correction. +      Filename = TypoCorrectionName; +      return File; +    } +  } + +  // If the file is still not found, just go with the vanilla diagnostic +  assert(!File.hasValue() && "expected missing file"); +  Diag(FilenameTok, diag::err_pp_file_not_found) +      << OriginalFilename << FilenameRange; +  if (IsFrameworkFound) { +    size_t SlashPos = OriginalFilename.find('/'); +    assert(SlashPos != StringRef::npos && +           "Include with framework name should have '/' in the filename"); +    StringRef FrameworkName = OriginalFilename.substr(0, SlashPos); +    FrameworkCacheEntry &CacheEntry = +        HeaderInfo.LookupFrameworkCache(FrameworkName); +    assert(CacheEntry.Directory && "Found framework should be in cache"); +    Diag(FilenameTok, diag::note_pp_framework_without_header) +        << OriginalFilename.substr(SlashPos + 1) << FrameworkName +        << CacheEntry.Directory->getName(); +  } + +  return None; +} + +/// Handle either a #include-like directive or an import declaration that names +/// a header file. +/// +/// \param HashLoc The location of the '#' token for an include, or +///        SourceLocation() for an import declaration. +/// \param IncludeTok The include / include_next / import token. +/// \param FilenameTok The header-name token. +/// \param EndLoc The location at which any imported macros become visible. +/// \param LookupFrom For #include_next, the starting directory for the +///        directory lookup. +/// \param LookupFromFile For #include_next, the starting file for the directory +///        lookup. +Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( +    SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok, +    SourceLocation EndLoc, const DirectoryLookup *LookupFrom, +    const FileEntry *LookupFromFile) { +  SmallString<128> FilenameBuffer; +  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); +  SourceLocation CharEnd = FilenameTok.getEndLoc(); + +  CharSourceRange FilenameRange +    = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd); +  StringRef OriginalFilename = Filename; +  bool isAngled = +    GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + +  // If GetIncludeFilenameSpelling set the start ptr to null, there was an +  // error. +  if (Filename.empty()) +    return {ImportAction::None}; + +  bool IsImportDecl = HashLoc.isInvalid(); +  SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc; + +  // Complain about attempts to #include files in an audit pragma. +  if (PragmaARCCFCodeAuditedInfo.second.isValid()) { +    Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl; +    Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here); + +    // Immediately leave the pragma. +    PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()}; +  } + +  // Complain about attempts to #include files in an assume-nonnull pragma. +  if (PragmaAssumeNonNullLoc.isValid()) { +    Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl; +    Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here); + +    // Immediately leave the pragma. +    PragmaAssumeNonNullLoc = SourceLocation(); +  } + +  if (HeaderInfo.HasIncludeAliasMap()) { +    // Map the filename with the brackets still attached.  If the name doesn't +    // map to anything, fall back on the filename we've already gotten the +    // spelling for. +    StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename); +    if (!NewName.empty()) +      Filename = NewName; +  } + +  // Search include directories. +  bool IsMapped = false; +  bool IsFrameworkFound = false; +  const DirectoryLookup *CurDir; +  SmallString<1024> SearchPath; +  SmallString<1024> RelativePath; +  // We get the raw path only if we have 'Callbacks' to which we later pass +  // the path. +  ModuleMap::KnownHeader SuggestedModule; +  SourceLocation FilenameLoc = FilenameTok.getLocation(); +  StringRef LookupFilename = Filename; + +#ifndef _WIN32 +  // Normalize slashes when compiling with -fms-extensions on non-Windows. This +  // is unnecessary on Windows since the filesystem there handles backslashes. +  SmallString<128> NormalizedPath; +  if (LangOpts.MicrosoftExt) { +    NormalizedPath = Filename.str(); +    llvm::sys::path::native(NormalizedPath); +    LookupFilename = NormalizedPath; +  } +#endif + +  Optional<FileEntryRef> File = LookupHeaderIncludeOrImport( +      CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok, +      IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile, +      LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled); + +  if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) { +    if (File && isPCHThroughHeader(&File->getFileEntry())) +      SkippingUntilPCHThroughHeader = false; +    return {ImportAction::None}; +  } + +  // Check for circular inclusion of the main file. +  // We can't generate a consistent preamble with regard to the conditional +  // stack if the main file is included again as due to the preamble bounds +  // some directives (e.g. #endif of a header guard) will never be seen. +  // Since this will lead to confusing errors, avoid the inclusion. +  if (File && PreambleConditionalStack.isRecording() && +      SourceMgr.translateFile(&File->getFileEntry()) == +          SourceMgr.getMainFileID()) { +    Diag(FilenameTok.getLocation(), +         diag::err_pp_including_mainfile_in_preamble); +    return {ImportAction::None}; +  } + +  // Should we enter the source file? Set to Skip if either the source file is +  // known to have no effect beyond its effect on module visibility -- that is, +  // if it's got an include guard that is already defined, set to Import if it +  // is a modular header we've already built and should import. +  enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter; + +  if (PPOpts->SingleFileParseMode) +    Action = IncludeLimitReached; + +  // If we've reached the max allowed include depth, it is usually due to an +  // include cycle. Don't enter already processed files again as it can lead to +  // reaching the max allowed include depth again. +  if (Action == Enter && HasReachedMaxIncludeDepth && File && +      HeaderInfo.getFileInfo(&File->getFileEntry()).NumIncludes) +    Action = IncludeLimitReached; + +  // Determine whether we should try to import the module for this #include, if +  // there is one. Don't do so if precompiled module support is disabled or we +  // are processing this module textually (because we're building the module). +  if (Action == Enter && File && SuggestedModule && getLangOpts().Modules && +      !isForModuleBuilding(SuggestedModule.getModule(), +                           getLangOpts().CurrentModule, +                           getLangOpts().ModuleName)) { +    // If this include corresponds to a module but that module is +    // unavailable, diagnose the situation and bail out. +    // FIXME: Remove this; loadModule does the same check (but produces +    // slightly worse diagnostics). +    if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), getDiagnostics(), +                               SuggestedModule.getModule())) { +      Diag(FilenameTok.getLocation(), +           diag::note_implicit_top_level_module_import_here) +          << SuggestedModule.getModule()->getTopLevelModuleName(); +      return {ImportAction::None}; +    } + +    // Compute the module access path corresponding to this module. +    // FIXME: Should we have a second loadModule() overload to avoid this +    // extra lookup step? +    SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; +    for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent) +      Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), +                                    FilenameTok.getLocation())); +    std::reverse(Path.begin(), Path.end()); + +    // Warn that we're replacing the include/import with a module import. +    if (!IsImportDecl) +      diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd); + +    // Load the module to import its macros. We'll make the declarations +    // visible when the parser gets here. +    // FIXME: Pass SuggestedModule in here rather than converting it to a path +    // and making the module loader convert it back again. +    ModuleLoadResult Imported = TheModuleLoader.loadModule( +        IncludeTok.getLocation(), Path, Module::Hidden, +        /*IsInclusionDirective=*/true); +    assert((Imported == nullptr || Imported == SuggestedModule.getModule()) && +           "the imported module is different than the suggested one"); + +    if (Imported) { +      Action = Import; +    } else if (Imported.isMissingExpected()) { +      // We failed to find a submodule that we assumed would exist (because it +      // was in the directory of an umbrella header, for instance), but no +      // actual module containing it exists (because the umbrella header is +      // incomplete).  Treat this as a textual inclusion. +      SuggestedModule = ModuleMap::KnownHeader(); +    } else if (Imported.isConfigMismatch()) { +      // On a configuration mismatch, enter the header textually. We still know +      // that it's part of the corresponding module. +    } else { +      // We hit an error processing the import. Bail out. +      if (hadModuleLoaderFatalFailure()) { +        // With a fatal failure in the module loader, we abort parsing. +        Token &Result = IncludeTok; +        assert(CurLexer && "#include but no current lexer set!"); +        Result.startToken(); +        CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); +        CurLexer->cutOffLexing(); +      } +      return {ImportAction::None}; +    } +  } + +  // The #included file will be considered to be a system header if either it is +  // in a system include directory, or if the #includer is a system include +  // header. +  SrcMgr::CharacteristicKind FileCharacter = +      SourceMgr.getFileCharacteristic(FilenameTok.getLocation()); +  if (File) +    FileCharacter = std::max(HeaderInfo.getFileDirFlavor(&File->getFileEntry()), +                             FileCharacter); + +  // If this is a '#import' or an import-declaration, don't re-enter the file. +  // +  // FIXME: If we have a suggested module for a '#include', and we've already +  // visited this file, don't bother entering it again. We know it has no +  // further effect. +  bool EnterOnce = +      IsImportDecl || +      IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import; + +  // Ask HeaderInfo if we should enter this #include file.  If not, #including +  // this file will have no effect. +  if (Action == Enter && File && +      !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(), +                                         EnterOnce, getLangOpts().Modules, +                                         SuggestedModule.getModule())) { +    // Even if we've already preprocessed this header once and know that we +    // don't need to see its contents again, we still need to import it if it's +    // modular because we might not have imported it from this submodule before. +    // +    // FIXME: We don't do this when compiling a PCH because the AST +    // serialization layer can't cope with it. This means we get local +    // submodule visibility semantics wrong in that case. +    Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip; +  } + +  if (Callbacks && !IsImportDecl) { +    // Notify the callback object that we've seen an inclusion directive. +    // FIXME: Use a different callback for a pp-import? +    Callbacks->InclusionDirective( +        HashLoc, IncludeTok, LookupFilename, isAngled, FilenameRange, +        File ? &File->getFileEntry() : nullptr, SearchPath, RelativePath, +        Action == Import ? SuggestedModule.getModule() : nullptr, +        FileCharacter); +    if (Action == Skip && File) +      Callbacks->FileSkipped(*File, FilenameTok, FileCharacter); +  } + +  if (!File) +    return {ImportAction::None}; + +  // If this is a C++20 pp-import declaration, diagnose if we didn't find any +  // module corresponding to the named header. +  if (IsImportDecl && !SuggestedModule) { +    Diag(FilenameTok, diag::err_header_import_not_header_unit) +      << OriginalFilename << File->getName(); +    return {ImportAction::None}; +  } + +  // Issue a diagnostic if the name of the file on disk has a different case +  // than the one we're about to open. +  const bool CheckIncludePathPortability = +      !IsMapped && !File->getFileEntry().tryGetRealPathName().empty(); + +  if (CheckIncludePathPortability) { +    StringRef Name = LookupFilename; +    StringRef RealPathName = File->getFileEntry().tryGetRealPathName(); +    SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name), +                                          llvm::sys::path::end(Name)); + +    if (trySimplifyPath(Components, RealPathName)) { +      SmallString<128> Path; +      Path.reserve(Name.size()+2); +      Path.push_back(isAngled ? '<' : '"'); +      bool isLeadingSeparator = llvm::sys::path::is_absolute(Name); +      for (auto Component : Components) { +        if (isLeadingSeparator) +          isLeadingSeparator = false; +        else +          Path.append(Component); +        // Append the separator the user used, or the close quote +        Path.push_back( +          Path.size() <= Filename.size() ? Filename[Path.size()-1] : +            (isAngled ? '>' : '"')); +      } +      // For user files and known standard headers, by default we issue a diagnostic. +      // For other system headers, we don't. They can be controlled separately. +      auto DiagId = (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) ? +          diag::pp_nonportable_path : diag::pp_nonportable_system_path; +      Diag(FilenameTok, DiagId) << Path << +        FixItHint::CreateReplacement(FilenameRange, Path); +    } +  } + +  switch (Action) { +  case Skip: +    // If we don't need to enter the file, stop now. +    if (Module *M = SuggestedModule.getModule()) +      return {ImportAction::SkippedModuleImport, M}; +    return {ImportAction::None}; + +  case IncludeLimitReached: +    // If we reached our include limit and don't want to enter any more files, +    // don't go any further. +    return {ImportAction::None}; + +  case Import: { +    // If this is a module import, make it visible if needed. +    Module *M = SuggestedModule.getModule(); +    assert(M && "no module to import"); + +    makeModuleVisible(M, EndLoc); + +    if (IncludeTok.getIdentifierInfo()->getPPKeywordID() == +        tok::pp___include_macros) +      return {ImportAction::None}; + +    return {ImportAction::ModuleImport, M}; +  } + +  case Enter: +    break; +  } + +  // Check that we don't have infinite #include recursion. +  if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) { +    Diag(FilenameTok, diag::err_pp_include_too_deep); +    HasReachedMaxIncludeDepth = true; +    return {ImportAction::None}; +  } + +  // Look up the file, create a File ID for it. +  SourceLocation IncludePos = FilenameTok.getLocation(); +  // If the filename string was the result of macro expansions, set the include +  // position on the file where it will be included and after the expansions. +  if (IncludePos.isMacroID()) +    IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd(); +  FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter); +  assert(FID.isValid() && "Expected valid file ID"); + +  // If all is good, enter the new file! +  if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation())) +    return {ImportAction::None}; + +  // Determine if we're switching to building a new submodule, and which one. +  if (auto *M = SuggestedModule.getModule()) { +    if (M->getTopLevelModule()->ShadowingModule) { +      // We are building a submodule that belongs to a shadowed module. This +      // means we find header files in the shadowed module. +      Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule) +        << M->getFullModuleName(); +      Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc, +           diag::note_previous_definition); +      return {ImportAction::None}; +    } +    // When building a pch, -fmodule-name tells the compiler to textually +    // include headers in the specified module. We are not building the +    // specified module. +    // +    // FIXME: This is the wrong way to handle this. We should produce a PCH +    // that behaves the same as the header would behave in a compilation using +    // that PCH, which means we should enter the submodule. We need to teach +    // the AST serialization layer to deal with the resulting AST. +    if (getLangOpts().CompilingPCH && +        isForModuleBuilding(M, getLangOpts().CurrentModule, +                            getLangOpts().ModuleName)) +      return {ImportAction::None}; + +    assert(!CurLexerSubmodule && "should not have marked this as a module yet"); +    CurLexerSubmodule = M; + +    // Let the macro handling code know that any future macros are within +    // the new submodule. +    EnterSubmodule(M, EndLoc, /*ForPragma*/false); + +    // Let the parser know that any future declarations are within the new +    // submodule. +    // FIXME: There's no point doing this if we're handling a #__include_macros +    // directive. +    return {ImportAction::ModuleBegin, M}; +  } + +  assert(!IsImportDecl && "failed to diagnose missing module for import decl"); +  return {ImportAction::None}; +} + +/// HandleIncludeNextDirective - Implements \#include_next. +/// +void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, +                                              Token &IncludeNextTok) { +  Diag(IncludeNextTok, diag::ext_pp_include_next_directive); + +  // #include_next is like #include, except that we start searching after +  // the current found directory.  If we can't do this, issue a +  // diagnostic. +  const DirectoryLookup *Lookup = CurDirLookup; +  const FileEntry *LookupFromFile = nullptr; +  if (isInPrimaryFile() && LangOpts.IsHeaderFile) { +    // If the main file is a header, then it's either for PCH/AST generation, +    // or libclang opened it. Either way, handle it as a normal include below +    // and do not complain about include_next. +  } else if (isInPrimaryFile()) { +    Lookup = nullptr; +    Diag(IncludeNextTok, diag::pp_include_next_in_primary); +  } else if (CurLexerSubmodule) { +    // Start looking up in the directory *after* the one in which the current +    // file would be found, if any. +    assert(CurPPLexer && "#include_next directive in macro?"); +    LookupFromFile = CurPPLexer->getFileEntry(); +    Lookup = nullptr; +  } else if (!Lookup) { +    // The current file was not found by walking the include path. Either it +    // is the primary file (handled above), or it was found by absolute path, +    // or it was found relative to such a file. +    // FIXME: Track enough information so we know which case we're in. +    Diag(IncludeNextTok, diag::pp_include_next_absolute_path); +  } else { +    // Start looking up in the next directory. +    ++Lookup; +  } + +  return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup, +                                LookupFromFile); +} + +/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode +void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) { +  // The Microsoft #import directive takes a type library and generates header +  // files from it, and includes those.  This is beyond the scope of what clang +  // does, so we ignore it and error out.  However, #import can optionally have +  // trailing attributes that span multiple lines.  We're going to eat those +  // so we can continue processing from there. +  Diag(Tok, diag::err_pp_import_directive_ms ); + +  // Read tokens until we get to the end of the directive.  Note that the +  // directive can be split over multiple lines using the backslash character. +  DiscardUntilEndOfDirective(); +} + +/// HandleImportDirective - Implements \#import. +/// +void Preprocessor::HandleImportDirective(SourceLocation HashLoc, +                                         Token &ImportTok) { +  if (!LangOpts.ObjC) {  // #import is standard for ObjC. +    if (LangOpts.MSVCCompat) +      return HandleMicrosoftImportDirective(ImportTok); +    Diag(ImportTok, diag::ext_pp_import_directive); +  } +  return HandleIncludeDirective(HashLoc, ImportTok); +} + +/// HandleIncludeMacrosDirective - The -imacros command line option turns into a +/// pseudo directive in the predefines buffer.  This handles it by sucking all +/// tokens through the preprocessor and discarding them (only keeping the side +/// effects on the preprocessor). +void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc, +                                                Token &IncludeMacrosTok) { +  // This directive should only occur in the predefines buffer.  If not, emit an +  // error and reject it. +  SourceLocation Loc = IncludeMacrosTok.getLocation(); +  if (SourceMgr.getBufferName(Loc) != "<built-in>") { +    Diag(IncludeMacrosTok.getLocation(), +         diag::pp_include_macros_out_of_predefines); +    DiscardUntilEndOfDirective(); +    return; +  } + +  // Treat this as a normal #include for checking purposes.  If this is +  // successful, it will push a new lexer onto the include stack. +  HandleIncludeDirective(HashLoc, IncludeMacrosTok); + +  Token TmpTok; +  do { +    Lex(TmpTok); +    assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!"); +  } while (TmpTok.isNot(tok::hashhash)); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Macro Directive Handling. +//===----------------------------------------------------------------------===// + +/// ReadMacroParameterList - The ( starting a parameter list of a macro +/// definition has just been read.  Lex the rest of the parameters and the +/// closing ), updating MI with what we learn.  Return true if an error occurs +/// parsing the param list. +bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { +  SmallVector<IdentifierInfo*, 32> Parameters; + +  while (true) { +    LexUnexpandedToken(Tok); +    switch (Tok.getKind()) { +    case tok::r_paren: +      // Found the end of the parameter list. +      if (Parameters.empty())  // #define FOO() +        return false; +      // Otherwise we have #define FOO(A,) +      Diag(Tok, diag::err_pp_expected_ident_in_arg_list); +      return true; +    case tok::ellipsis:  // #define X(... -> C99 varargs +      if (!LangOpts.C99) +        Diag(Tok, LangOpts.CPlusPlus11 ? +             diag::warn_cxx98_compat_variadic_macro : +             diag::ext_variadic_macro); + +      // OpenCL v1.2 s6.9.e: variadic macros are not supported. +      if (LangOpts.OpenCL) { +        Diag(Tok, diag::ext_pp_opencl_variadic_macros); +      } + +      // Lex the token after the identifier. +      LexUnexpandedToken(Tok); +      if (Tok.isNot(tok::r_paren)) { +        Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); +        return true; +      } +      // Add the __VA_ARGS__ identifier as a parameter. +      Parameters.push_back(Ident__VA_ARGS__); +      MI->setIsC99Varargs(); +      MI->setParameterList(Parameters, BP); +      return false; +    case tok::eod:  // #define X( +      Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); +      return true; +    default: +      // Handle keywords and identifiers here to accept things like +      // #define Foo(for) for. +      IdentifierInfo *II = Tok.getIdentifierInfo(); +      if (!II) { +        // #define X(1 +        Diag(Tok, diag::err_pp_invalid_tok_in_arg_list); +        return true; +      } + +      // If this is already used as a parameter, it is used multiple times (e.g. +      // #define X(A,A. +      if (llvm::find(Parameters, II) != Parameters.end()) { // C99 6.10.3p6 +        Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II; +        return true; +      } + +      // Add the parameter to the macro info. +      Parameters.push_back(II); + +      // Lex the token after the identifier. +      LexUnexpandedToken(Tok); + +      switch (Tok.getKind()) { +      default:          // #define X(A B +        Diag(Tok, diag::err_pp_expected_comma_in_arg_list); +        return true; +      case tok::r_paren: // #define X(A) +        MI->setParameterList(Parameters, BP); +        return false; +      case tok::comma:  // #define X(A, +        break; +      case tok::ellipsis:  // #define X(A... -> GCC extension +        // Diagnose extension. +        Diag(Tok, diag::ext_named_variadic_macro); + +        // Lex the token after the identifier. +        LexUnexpandedToken(Tok); +        if (Tok.isNot(tok::r_paren)) { +          Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); +          return true; +        } + +        MI->setIsGNUVarargs(); +        MI->setParameterList(Parameters, BP); +        return false; +      } +    } +  } +} + +static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI, +                                   const LangOptions &LOptions) { +  if (MI->getNumTokens() == 1) { +    const Token &Value = MI->getReplacementToken(0); + +    // Macro that is identity, like '#define inline inline' is a valid pattern. +    if (MacroName.getKind() == Value.getKind()) +      return true; + +    // Macro that maps a keyword to the same keyword decorated with leading/ +    // trailing underscores is a valid pattern: +    //    #define inline __inline +    //    #define inline __inline__ +    //    #define inline _inline (in MS compatibility mode) +    StringRef MacroText = MacroName.getIdentifierInfo()->getName(); +    if (IdentifierInfo *II = Value.getIdentifierInfo()) { +      if (!II->isKeyword(LOptions)) +        return false; +      StringRef ValueText = II->getName(); +      StringRef TrimmedValue = ValueText; +      if (!ValueText.startswith("__")) { +        if (ValueText.startswith("_")) +          TrimmedValue = TrimmedValue.drop_front(1); +        else +          return false; +      } else { +        TrimmedValue = TrimmedValue.drop_front(2); +        if (TrimmedValue.endswith("__")) +          TrimmedValue = TrimmedValue.drop_back(2); +      } +      return TrimmedValue.equals(MacroText); +    } else { +      return false; +    } +  } + +  // #define inline +  return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static, +                           tok::kw_const) && +         MI->getNumTokens() == 0; +} + +// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the +// entire line) of the macro's tokens and adds them to MacroInfo, and while +// doing so performs certain validity checks including (but not limited to): +//   - # (stringization) is followed by a macro parameter +// +//  Returns a nullptr if an invalid sequence of tokens is encountered or returns +//  a pointer to a MacroInfo object. + +MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( +    const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) { + +  Token LastTok = MacroNameTok; +  // Create the new macro. +  MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation()); + +  Token Tok; +  LexUnexpandedToken(Tok); + +  // Ensure we consume the rest of the macro body if errors occur. +  auto _ = llvm::make_scope_exit([&]() { +    // The flag indicates if we are still waiting for 'eod'. +    if (CurLexer->ParsingPreprocessorDirective) +      DiscardUntilEndOfDirective(); +  }); + +  // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk +  // within their appropriate context. +  VariadicMacroScopeGuard VariadicMacroScopeGuard(*this); + +  // If this is a function-like macro definition, parse the argument list, +  // marking each of the identifiers as being used as macro arguments.  Also, +  // check other constraints on the first token of the macro body. +  if (Tok.is(tok::eod)) { +    if (ImmediatelyAfterHeaderGuard) { +      // Save this macro information since it may part of a header guard. +      CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(), +                                        MacroNameTok.getLocation()); +    } +    // If there is no body to this macro, we have no special handling here. +  } else if (Tok.hasLeadingSpace()) { +    // This is a normal token with leading space.  Clear the leading space +    // marker on the first token to get proper expansion. +    Tok.clearFlag(Token::LeadingSpace); +  } else if (Tok.is(tok::l_paren)) { +    // This is a function-like macro definition.  Read the argument list. +    MI->setIsFunctionLike(); +    if (ReadMacroParameterList(MI, LastTok)) +      return nullptr; + +    // If this is a definition of an ISO C/C++ variadic function-like macro (not +    // using the GNU named varargs extension) inform our variadic scope guard +    // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__) +    // allowed only within the definition of a variadic macro. + +    if (MI->isC99Varargs()) { +      VariadicMacroScopeGuard.enterScope(); +    } + +    // Read the first token after the arg list for down below. +    LexUnexpandedToken(Tok); +  } else if (LangOpts.C99 || LangOpts.CPlusPlus11) { +    // C99 requires whitespace between the macro definition and the body.  Emit +    // a diagnostic for something like "#define X+". +    Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); +  } else { +    // C90 6.8 TC1 says: "In the definition of an object-like macro, if the +    // first character of a replacement list is not a character required by +    // subclause 5.2.1, then there shall be white-space separation between the +    // identifier and the replacement list.".  5.2.1 lists this set: +    //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which +    // is irrelevant here. +    bool isInvalid = false; +    if (Tok.is(tok::at)) // @ is not in the list above. +      isInvalid = true; +    else if (Tok.is(tok::unknown)) { +      // If we have an unknown token, it is something strange like "`".  Since +      // all of valid characters would have lexed into a single character +      // token of some sort, we know this is not a valid case. +      isInvalid = true; +    } +    if (isInvalid) +      Diag(Tok, diag::ext_missing_whitespace_after_macro_name); +    else +      Diag(Tok, diag::warn_missing_whitespace_after_macro_name); +  } + +  if (!Tok.is(tok::eod)) +    LastTok = Tok; + +  // Read the rest of the macro body. +  if (MI->isObjectLike()) { +    // Object-like macros are very simple, just read their body. +    while (Tok.isNot(tok::eod)) { +      LastTok = Tok; +      MI->AddTokenToBody(Tok); +      // Get the next token of the macro. +      LexUnexpandedToken(Tok); +    } +  } else { +    // Otherwise, read the body of a function-like macro.  While we are at it, +    // check C99 6.10.3.2p1: ensure that # operators are followed by macro +    // parameters in function-like macro expansions. + +    VAOptDefinitionContext VAOCtx(*this); + +    while (Tok.isNot(tok::eod)) { +      LastTok = Tok; + +      if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) { +        MI->AddTokenToBody(Tok); + +        if (VAOCtx.isVAOptToken(Tok)) { +          // If we're already within a VAOPT, emit an error. +          if (VAOCtx.isInVAOpt()) { +            Diag(Tok, diag::err_pp_vaopt_nested_use); +            return nullptr; +          } +          // Ensure VAOPT is followed by a '(' . +          LexUnexpandedToken(Tok); +          if (Tok.isNot(tok::l_paren)) { +            Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use); +            return nullptr; +          } +          MI->AddTokenToBody(Tok); +          VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation()); +          LexUnexpandedToken(Tok); +          if (Tok.is(tok::hashhash)) { +            Diag(Tok, diag::err_vaopt_paste_at_start); +            return nullptr; +          } +          continue; +        } else if (VAOCtx.isInVAOpt()) { +          if (Tok.is(tok::r_paren)) { +            if (VAOCtx.sawClosingParen()) { +              const unsigned NumTokens = MI->getNumTokens(); +              assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( " +                                       "and a subsequent tok::r_paren"); +              if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) { +                Diag(Tok, diag::err_vaopt_paste_at_end); +                return nullptr; +              } +            } +          } else if (Tok.is(tok::l_paren)) { +            VAOCtx.sawOpeningParen(Tok.getLocation()); +          } +        } +        // Get the next token of the macro. +        LexUnexpandedToken(Tok); +        continue; +      } + +      // If we're in -traditional mode, then we should ignore stringification +      // and token pasting. Mark the tokens as unknown so as not to confuse +      // things. +      if (getLangOpts().TraditionalCPP) { +        Tok.setKind(tok::unknown); +        MI->AddTokenToBody(Tok); + +        // Get the next token of the macro. +        LexUnexpandedToken(Tok); +        continue; +      } + +      if (Tok.is(tok::hashhash)) { +        // If we see token pasting, check if it looks like the gcc comma +        // pasting extension.  We'll use this information to suppress +        // diagnostics later on. + +        // Get the next token of the macro. +        LexUnexpandedToken(Tok); + +        if (Tok.is(tok::eod)) { +          MI->AddTokenToBody(LastTok); +          break; +        } + +        unsigned NumTokens = MI->getNumTokens(); +        if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && +            MI->getReplacementToken(NumTokens-1).is(tok::comma)) +          MI->setHasCommaPasting(); + +        // Things look ok, add the '##' token to the macro. +        MI->AddTokenToBody(LastTok); +        continue; +      } + +      // Our Token is a stringization operator. +      // Get the next token of the macro. +      LexUnexpandedToken(Tok); + +      // Check for a valid macro arg identifier or __VA_OPT__. +      if (!VAOCtx.isVAOptToken(Tok) && +          (Tok.getIdentifierInfo() == nullptr || +           MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) { + +        // If this is assembler-with-cpp mode, we accept random gibberish after +        // the '#' because '#' is often a comment character.  However, change +        // the kind of the token to tok::unknown so that the preprocessor isn't +        // confused. +        if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) { +          LastTok.setKind(tok::unknown); +          MI->AddTokenToBody(LastTok); +          continue; +        } else { +          Diag(Tok, diag::err_pp_stringize_not_parameter) +            << LastTok.is(tok::hashat); +          return nullptr; +        } +      } + +      // Things look ok, add the '#' and param name tokens to the macro. +      MI->AddTokenToBody(LastTok); + +      // If the token following '#' is VAOPT, let the next iteration handle it +      // and check it for correctness, otherwise add the token and prime the +      // loop with the next one. +      if (!VAOCtx.isVAOptToken(Tok)) { +        MI->AddTokenToBody(Tok); +        LastTok = Tok; + +        // Get the next token of the macro. +        LexUnexpandedToken(Tok); +      } +    } +    if (VAOCtx.isInVAOpt()) { +      assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive"); +      Diag(Tok, diag::err_pp_expected_after) +        << LastTok.getKind() << tok::r_paren; +      Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren; +      return nullptr; +    } +  } +  MI->setDefinitionEndLoc(LastTok.getLocation()); +  return MI; +} +/// HandleDefineDirective - Implements \#define.  This consumes the entire macro +/// line then lets the caller lex the next real token. +void Preprocessor::HandleDefineDirective( +    Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) { +  ++NumDefined; + +  Token MacroNameTok; +  bool MacroShadowsKeyword; +  ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword); + +  // Error reading macro name?  If so, diagnostic already issued. +  if (MacroNameTok.is(tok::eod)) +    return; + +  // If we are supposed to keep comments in #defines, reenable comment saving +  // mode. +  if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments); + +  MacroInfo *const MI = ReadOptionalMacroParameterListAndBody( +      MacroNameTok, ImmediatelyAfterHeaderGuard); + +  if (!MI) return; + +  if (MacroShadowsKeyword && +      !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) { +    Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword); +  } +  // Check that there is no paste (##) operator at the beginning or end of the +  // replacement list. +  unsigned NumTokens = MI->getNumTokens(); +  if (NumTokens != 0) { +    if (MI->getReplacementToken(0).is(tok::hashhash)) { +      Diag(MI->getReplacementToken(0), diag::err_paste_at_start); +      return; +    } +    if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) { +      Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end); +      return; +    } +  } + +  // When skipping just warn about macros that do not match. +  if (SkippingUntilPCHThroughHeader) { +    const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo()); +    if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this, +                             /*Syntactic=*/LangOpts.MicrosoftExt)) +      Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch) +          << MacroNameTok.getIdentifierInfo(); +    return; +  } + +  // Finally, if this identifier already had a macro defined for it, verify that +  // the macro bodies are identical, and issue diagnostics if they are not. +  if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) { +    // In Objective-C, ignore attempts to directly redefine the builtin +    // definitions of the ownership qualifiers.  It's still possible to +    // #undef them. +    auto isObjCProtectedMacro = [](const IdentifierInfo *II) -> bool { +      return II->isStr("__strong") || +             II->isStr("__weak") || +             II->isStr("__unsafe_unretained") || +             II->isStr("__autoreleasing"); +    }; +   if (getLangOpts().ObjC && +        SourceMgr.getFileID(OtherMI->getDefinitionLoc()) +          == getPredefinesFileID() && +        isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) { +      // Warn if it changes the tokens. +      if ((!getDiagnostics().getSuppressSystemWarnings() || +           !SourceMgr.isInSystemHeader(DefineTok.getLocation())) && +          !MI->isIdenticalTo(*OtherMI, *this, +                             /*Syntactic=*/LangOpts.MicrosoftExt)) { +        Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored); +      } +      assert(!OtherMI->isWarnIfUnused()); +      return; +    } + +    // It is very common for system headers to have tons of macro redefinitions +    // and for warnings to be disabled in system headers.  If this is the case, +    // then don't bother calling MacroInfo::isIdenticalTo. +    if (!getDiagnostics().getSuppressSystemWarnings() || +        !SourceMgr.isInSystemHeader(DefineTok.getLocation())) { +      if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused()) +        Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used); + +      // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and +      // C++ [cpp.predefined]p4, but allow it as an extension. +      if (OtherMI->isBuiltinMacro()) +        Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro); +      // Macros must be identical.  This means all tokens and whitespace +      // separation must be the same.  C99 6.10.3p2. +      else if (!OtherMI->isAllowRedefinitionsWithoutWarning() && +               !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) { +        Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef) +          << MacroNameTok.getIdentifierInfo(); +        Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition); +      } +    } +    if (OtherMI->isWarnIfUnused()) +      WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc()); +  } + +  DefMacroDirective *MD = +      appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI); + +  assert(!MI->isUsed()); +  // If we need warning for not using the macro, add its location in the +  // warn-because-unused-macro set. If it gets used it will be removed from set. +  if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) && +      !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) && +      !MacroExpansionInDirectivesOverride) { +    MI->setIsWarnIfUnused(true); +    WarnUnusedMacroLocs.insert(MI->getDefinitionLoc()); +  } + +  // If the callbacks want to know, tell them about the macro definition. +  if (Callbacks) +    Callbacks->MacroDefined(MacroNameTok, MD); +} + +/// HandleUndefDirective - Implements \#undef. +/// +void Preprocessor::HandleUndefDirective() { +  ++NumUndefined; + +  Token MacroNameTok; +  ReadMacroName(MacroNameTok, MU_Undef); + +  // Error reading macro name?  If so, diagnostic already issued. +  if (MacroNameTok.is(tok::eod)) +    return; + +  // Check to see if this is the last token on the #undef line. +  CheckEndOfDirective("undef"); + +  // Okay, we have a valid identifier to undef. +  auto *II = MacroNameTok.getIdentifierInfo(); +  auto MD = getMacroDefinition(II); +  UndefMacroDirective *Undef = nullptr; + +  // If the macro is not defined, this is a noop undef. +  if (const MacroInfo *MI = MD.getMacroInfo()) { +    if (!MI->isUsed() && MI->isWarnIfUnused()) +      Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used); + +    if (MI->isWarnIfUnused()) +      WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); + +    Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation()); +  } + +  // If the callbacks want to know, tell them about the macro #undef. +  // Note: no matter if the macro was defined or not. +  if (Callbacks) +    Callbacks->MacroUndefined(MacroNameTok, MD, Undef); + +  if (Undef) +    appendMacroDirective(II, Undef); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Conditional Directive Handling. +//===----------------------------------------------------------------------===// + +/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef +/// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is +/// true if any tokens have been returned or pp-directives activated before this +/// \#ifndef has been lexed. +/// +void Preprocessor::HandleIfdefDirective(Token &Result, +                                        const Token &HashToken, +                                        bool isIfndef, +                                        bool ReadAnyTokensBeforeDirective) { +  ++NumIf; +  Token DirectiveTok = Result; + +  Token MacroNameTok; +  ReadMacroName(MacroNameTok); + +  // Error reading macro name?  If so, diagnostic already issued. +  if (MacroNameTok.is(tok::eod)) { +    // Skip code until we get to #endif.  This helps with recovery by not +    // emitting an error when the #endif is reached. +    SkipExcludedConditionalBlock(HashToken.getLocation(), +                                 DirectiveTok.getLocation(), +                                 /*Foundnonskip*/ false, /*FoundElse*/ false); +    return; +  } + +  // Check to see if this is the last token on the #if[n]def line. +  CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef"); + +  IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); +  auto MD = getMacroDefinition(MII); +  MacroInfo *MI = MD.getMacroInfo(); + +  if (CurPPLexer->getConditionalStackDepth() == 0) { +    // If the start of a top-level #ifdef and if the macro is not defined, +    // inform MIOpt that this might be the start of a proper include guard. +    // Otherwise it is some other form of unknown conditional which we can't +    // handle. +    if (!ReadAnyTokensBeforeDirective && !MI) { +      assert(isIfndef && "#ifdef shouldn't reach here"); +      CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation()); +    } else +      CurPPLexer->MIOpt.EnterTopLevelConditional(); +  } + +  // If there is a macro, process it. +  if (MI)  // Mark it used. +    markMacroAsUsed(MI); + +  if (Callbacks) { +    if (isIfndef) +      Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD); +    else +      Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD); +  } + +  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && +    getSourceManager().isInMainFile(DirectiveTok.getLocation()); + +  // Should we include the stuff contained by this directive? +  if (PPOpts->SingleFileParseMode && !MI) { +    // In 'single-file-parse mode' undefined identifiers trigger parsing of all +    // the directive blocks. +    CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(), +                                     /*wasskip*/false, /*foundnonskip*/false, +                                     /*foundelse*/false); +  } else if (!MI == isIfndef || RetainExcludedCB) { +    // Yes, remember that we are inside a conditional, then lex the next token. +    CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(), +                                     /*wasskip*/false, /*foundnonskip*/true, +                                     /*foundelse*/false); +  } else { +    // No, skip the contents of this block. +    SkipExcludedConditionalBlock(HashToken.getLocation(), +                                 DirectiveTok.getLocation(), +                                 /*Foundnonskip*/ false, +                                 /*FoundElse*/ false); +  } +} + +/// HandleIfDirective - Implements the \#if directive. +/// +void Preprocessor::HandleIfDirective(Token &IfToken, +                                     const Token &HashToken, +                                     bool ReadAnyTokensBeforeDirective) { +  ++NumIf; + +  // Parse and evaluate the conditional expression. +  IdentifierInfo *IfNDefMacro = nullptr; +  const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); +  const bool ConditionalTrue = DER.Conditional; + +  // If this condition is equivalent to #ifndef X, and if this is the first +  // directive seen, handle it for the multiple-include optimization. +  if (CurPPLexer->getConditionalStackDepth() == 0) { +    if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue) +      // FIXME: Pass in the location of the macro name, not the 'if' token. +      CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation()); +    else +      CurPPLexer->MIOpt.EnterTopLevelConditional(); +  } + +  if (Callbacks) +    Callbacks->If( +        IfToken.getLocation(), DER.ExprRange, +        (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False)); + +  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && +    getSourceManager().isInMainFile(IfToken.getLocation()); + +  // Should we include the stuff contained by this directive? +  if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) { +    // In 'single-file-parse mode' undefined identifiers trigger parsing of all +    // the directive blocks. +    CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, +                                     /*foundnonskip*/false, /*foundelse*/false); +  } else if (ConditionalTrue || RetainExcludedCB) { +    // Yes, remember that we are inside a conditional, then lex the next token. +    CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, +                                   /*foundnonskip*/true, /*foundelse*/false); +  } else { +    // No, skip the contents of this block. +    SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(), +                                 /*Foundnonskip*/ false, +                                 /*FoundElse*/ false); +  } +} + +/// HandleEndifDirective - Implements the \#endif directive. +/// +void Preprocessor::HandleEndifDirective(Token &EndifToken) { +  ++NumEndif; + +  // Check that this is the whole directive. +  CheckEndOfDirective("endif"); + +  PPConditionalInfo CondInfo; +  if (CurPPLexer->popConditionalLevel(CondInfo)) { +    // No conditionals on the stack: this is an #endif without an #if. +    Diag(EndifToken, diag::err_pp_endif_without_if); +    return; +  } + +  // If this the end of a top-level #endif, inform MIOpt. +  if (CurPPLexer->getConditionalStackDepth() == 0) +    CurPPLexer->MIOpt.ExitTopLevelConditional(); + +  assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode && +         "This code should only be reachable in the non-skipping case!"); + +  if (Callbacks) +    Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc); +} + +/// HandleElseDirective - Implements the \#else directive. +/// +void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) { +  ++NumElse; + +  // #else directive in a non-skipping conditional... start skipping. +  CheckEndOfDirective("else"); + +  PPConditionalInfo CI; +  if (CurPPLexer->popConditionalLevel(CI)) { +    Diag(Result, diag::pp_err_else_without_if); +    return; +  } + +  // If this is a top-level #else, inform the MIOpt. +  if (CurPPLexer->getConditionalStackDepth() == 0) +    CurPPLexer->MIOpt.EnterTopLevelConditional(); + +  // If this is a #else with a #else before it, report the error. +  if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else); + +  if (Callbacks) +    Callbacks->Else(Result.getLocation(), CI.IfLoc); + +  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && +    getSourceManager().isInMainFile(Result.getLocation()); + +  if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) { +    // In 'single-file-parse mode' undefined identifiers trigger parsing of all +    // the directive blocks. +    CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false, +                                     /*foundnonskip*/false, /*foundelse*/true); +    return; +  } + +  // Finally, skip the rest of the contents of this block. +  SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc, +                               /*Foundnonskip*/ true, +                               /*FoundElse*/ true, Result.getLocation()); +} + +/// HandleElifDirective - Implements the \#elif directive. +/// +void Preprocessor::HandleElifDirective(Token &ElifToken, +                                       const Token &HashToken) { +  ++NumElse; + +  // #elif directive in a non-skipping conditional... start skipping. +  // We don't care what the condition is, because we will always skip it (since +  // the block immediately before it was included). +  SourceRange ConditionRange = DiscardUntilEndOfDirective(); + +  PPConditionalInfo CI; +  if (CurPPLexer->popConditionalLevel(CI)) { +    Diag(ElifToken, diag::pp_err_elif_without_if); +    return; +  } + +  // If this is a top-level #elif, inform the MIOpt. +  if (CurPPLexer->getConditionalStackDepth() == 0) +    CurPPLexer->MIOpt.EnterTopLevelConditional(); + +  // If this is a #elif with a #else before it, report the error. +  if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else); + +  if (Callbacks) +    Callbacks->Elif(ElifToken.getLocation(), ConditionRange, +                    PPCallbacks::CVK_NotEvaluated, CI.IfLoc); + +  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && +    getSourceManager().isInMainFile(ElifToken.getLocation()); + +  if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) { +    // In 'single-file-parse mode' undefined identifiers trigger parsing of all +    // the directive blocks. +    CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false, +                                     /*foundnonskip*/false, /*foundelse*/false); +    return; +  } + +  // Finally, skip the rest of the contents of this block. +  SkipExcludedConditionalBlock( +      HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true, +      /*FoundElse*/ CI.FoundElse, ElifToken.getLocation()); +} diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp new file mode 100644 index 000000000000..e5ec2b99f507 --- /dev/null +++ b/clang/lib/Lex/PPExpressions.cpp @@ -0,0 +1,899 @@ +//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Preprocessor::EvaluateDirectiveExpression method, +// which parses and evaluates integer constant expressions for #if directives. +// +//===----------------------------------------------------------------------===// +// +// FIXME: implement testing for #assert's. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" +#include <cassert> + +using namespace clang; + +namespace { + +/// PPValue - Represents the value of a subexpression of a preprocessor +/// conditional and the source range covered by it. +class PPValue { +  SourceRange Range; +  IdentifierInfo *II; + +public: +  llvm::APSInt Val; + +  // Default ctor - Construct an 'invalid' PPValue. +  PPValue(unsigned BitWidth) : Val(BitWidth) {} + +  // If this value was produced by directly evaluating an identifier, produce +  // that identifier. +  IdentifierInfo *getIdentifier() const { return II; } +  void setIdentifier(IdentifierInfo *II) { this->II = II; } + +  unsigned getBitWidth() const { return Val.getBitWidth(); } +  bool isUnsigned() const { return Val.isUnsigned(); } + +  SourceRange getRange() const { return Range; } + +  void setRange(SourceLocation L) { Range.setBegin(L); Range.setEnd(L); } +  void setRange(SourceLocation B, SourceLocation E) { +    Range.setBegin(B); Range.setEnd(E); +  } +  void setBegin(SourceLocation L) { Range.setBegin(L); } +  void setEnd(SourceLocation L) { Range.setEnd(L); } +}; + +} // end anonymous namespace + +static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, +                                     Token &PeekTok, bool ValueLive, +                                     bool &IncludedUndefinedIds, +                                     Preprocessor &PP); + +/// DefinedTracker - This struct is used while parsing expressions to keep track +/// of whether !defined(X) has been seen. +/// +/// With this simple scheme, we handle the basic forms: +///    !defined(X)   and !defined X +/// but we also trivially handle (silly) stuff like: +///    !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)). +struct DefinedTracker { +  /// Each time a Value is evaluated, it returns information about whether the +  /// parsed value is of the form defined(X), !defined(X) or is something else. +  enum TrackerState { +    DefinedMacro,        // defined(X) +    NotDefinedMacro,     // !defined(X) +    Unknown              // Something else. +  } State; +  /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this +  /// indicates the macro that was checked. +  IdentifierInfo *TheMacro; +  bool IncludedUndefinedIds = false; +}; + +/// EvaluateDefined - Process a 'defined(sym)' expression. +static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, +                            bool ValueLive, Preprocessor &PP) { +  SourceLocation beginLoc(PeekTok.getLocation()); +  Result.setBegin(beginLoc); + +  // Get the next token, don't expand it. +  PP.LexUnexpandedNonComment(PeekTok); + +  // Two options, it can either be a pp-identifier or a (. +  SourceLocation LParenLoc; +  if (PeekTok.is(tok::l_paren)) { +    // Found a paren, remember we saw it and skip it. +    LParenLoc = PeekTok.getLocation(); +    PP.LexUnexpandedNonComment(PeekTok); +  } + +  if (PeekTok.is(tok::code_completion)) { +    if (PP.getCodeCompletionHandler()) +      PP.getCodeCompletionHandler()->CodeCompleteMacroName(false); +    PP.setCodeCompletionReached(); +    PP.LexUnexpandedNonComment(PeekTok); +  } + +  // If we don't have a pp-identifier now, this is an error. +  if (PP.CheckMacroName(PeekTok, MU_Other)) +    return true; + +  // Otherwise, we got an identifier, is it defined to something? +  IdentifierInfo *II = PeekTok.getIdentifierInfo(); +  MacroDefinition Macro = PP.getMacroDefinition(II); +  Result.Val = !!Macro; +  Result.Val.setIsUnsigned(false); // Result is signed intmax_t. +  DT.IncludedUndefinedIds = !Macro; + +  // If there is a macro, mark it used. +  if (Result.Val != 0 && ValueLive) +    PP.markMacroAsUsed(Macro.getMacroInfo()); + +  // Save macro token for callback. +  Token macroToken(PeekTok); + +  // If we are in parens, ensure we have a trailing ). +  if (LParenLoc.isValid()) { +    // Consume identifier. +    Result.setEnd(PeekTok.getLocation()); +    PP.LexUnexpandedNonComment(PeekTok); + +    if (PeekTok.isNot(tok::r_paren)) { +      PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_after) +          << "'defined'" << tok::r_paren; +      PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren; +      return true; +    } +    // Consume the ). +    PP.LexNonComment(PeekTok); +    Result.setEnd(PeekTok.getLocation()); +  } else { +    // Consume identifier. +    Result.setEnd(PeekTok.getLocation()); +    PP.LexNonComment(PeekTok); +  } + +  // [cpp.cond]p4: +  //   Prior to evaluation, macro invocations in the list of preprocessing +  //   tokens that will become the controlling constant expression are replaced +  //   (except for those macro names modified by the 'defined' unary operator), +  //   just as in normal text. If the token 'defined' is generated as a result +  //   of this replacement process or use of the 'defined' unary operator does +  //   not match one of the two specified forms prior to macro replacement, the +  //   behavior is undefined. +  // This isn't an idle threat, consider this program: +  //   #define FOO +  //   #define BAR defined(FOO) +  //   #if BAR +  //   ... +  //   #else +  //   ... +  //   #endif +  // clang and gcc will pick the #if branch while Visual Studio will take the +  // #else branch.  Emit a warning about this undefined behavior. +  if (beginLoc.isMacroID()) { +    bool IsFunctionTypeMacro = +        PP.getSourceManager() +            .getSLocEntry(PP.getSourceManager().getFileID(beginLoc)) +            .getExpansion() +            .isFunctionMacroExpansion(); +    // For object-type macros, it's easy to replace +    //   #define FOO defined(BAR) +    // with +    //   #if defined(BAR) +    //   #define FOO 1 +    //   #else +    //   #define FOO 0 +    //   #endif +    // and doing so makes sense since compilers handle this differently in +    // practice (see example further up).  But for function-type macros, +    // there is no good way to write +    //   # define FOO(x) (defined(M_ ## x) && M_ ## x) +    // in a different way, and compilers seem to agree on how to behave here. +    // So warn by default on object-type macros, but only warn in -pedantic +    // mode on function-type macros. +    if (IsFunctionTypeMacro) +      PP.Diag(beginLoc, diag::warn_defined_in_function_type_macro); +    else +      PP.Diag(beginLoc, diag::warn_defined_in_object_type_macro); +  } + +  // Invoke the 'defined' callback. +  if (PPCallbacks *Callbacks = PP.getPPCallbacks()) { +    Callbacks->Defined(macroToken, Macro, +                       SourceRange(beginLoc, PeekTok.getLocation())); +  } + +  // Success, remember that we saw defined(X). +  DT.State = DefinedTracker::DefinedMacro; +  DT.TheMacro = II; +  return false; +} + +/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and +/// return the computed value in Result.  Return true if there was an error +/// parsing.  This function also returns information about the form of the +/// expression in DT.  See above for information on what DT means. +/// +/// If ValueLive is false, then this value is being evaluated in a context where +/// the result is not used.  As such, avoid diagnostics that relate to +/// evaluation. +static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, +                          bool ValueLive, Preprocessor &PP) { +  DT.State = DefinedTracker::Unknown; + +  Result.setIdentifier(nullptr); + +  if (PeekTok.is(tok::code_completion)) { +    if (PP.getCodeCompletionHandler()) +      PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression(); +    PP.setCodeCompletionReached(); +    PP.LexNonComment(PeekTok); +  } + +  switch (PeekTok.getKind()) { +  default: +    // If this token's spelling is a pp-identifier, check to see if it is +    // 'defined' or if it is a macro.  Note that we check here because many +    // keywords are pp-identifiers, so we can't check the kind. +    if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) { +      // Handle "defined X" and "defined(X)". +      if (II->isStr("defined")) +        return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP); + +      if (!II->isCPlusPlusOperatorKeyword()) { +        // If this identifier isn't 'defined' or one of the special +        // preprocessor keywords and it wasn't macro expanded, it turns +        // into a simple 0 +        if (ValueLive) +          PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II; +        Result.Val = 0; +        Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0. +        Result.setIdentifier(II); +        Result.setRange(PeekTok.getLocation()); +        DT.IncludedUndefinedIds = true; +        PP.LexNonComment(PeekTok); +        return false; +      } +    } +    PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr); +    return true; +  case tok::eod: +  case tok::r_paren: +    // If there is no expression, report and exit. +    PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr); +    return true; +  case tok::numeric_constant: { +    SmallString<64> IntegerBuffer; +    bool NumberInvalid = false; +    StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, +                                              &NumberInvalid); +    if (NumberInvalid) +      return true; // a diagnostic was already reported + +    NumericLiteralParser Literal(Spelling, PeekTok.getLocation(), PP); +    if (Literal.hadError) +      return true; // a diagnostic was already reported. + +    if (Literal.isFloatingLiteral() || Literal.isImaginary) { +      PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal); +      return true; +    } +    assert(Literal.isIntegerLiteral() && "Unknown ppnumber"); + +    // Complain about, and drop, any ud-suffix. +    if (Literal.hasUDSuffix()) +      PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*integer*/1; + +    // 'long long' is a C99 or C++11 feature. +    if (!PP.getLangOpts().C99 && Literal.isLongLong) { +      if (PP.getLangOpts().CPlusPlus) +        PP.Diag(PeekTok, +             PP.getLangOpts().CPlusPlus11 ? +             diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong); +      else +        PP.Diag(PeekTok, diag::ext_c99_longlong); +    } + +    // Parse the integer literal into Result. +    if (Literal.GetIntegerValue(Result.Val)) { +      // Overflow parsing integer literal. +      if (ValueLive) +        PP.Diag(PeekTok, diag::err_integer_literal_too_large) +            << /* Unsigned */ 1; +      Result.Val.setIsUnsigned(true); +    } else { +      // Set the signedness of the result to match whether there was a U suffix +      // or not. +      Result.Val.setIsUnsigned(Literal.isUnsigned); + +      // Detect overflow based on whether the value is signed.  If signed +      // and if the value is too large, emit a warning "integer constant is so +      // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t +      // is 64-bits. +      if (!Literal.isUnsigned && Result.Val.isNegative()) { +        // Octal, hexadecimal, and binary literals are implicitly unsigned if +        // the value does not fit into a signed integer type. +        if (ValueLive && Literal.getRadix() == 10) +          PP.Diag(PeekTok, diag::ext_integer_literal_too_large_for_signed); +        Result.Val.setIsUnsigned(true); +      } +    } + +    // Consume the token. +    Result.setRange(PeekTok.getLocation()); +    PP.LexNonComment(PeekTok); +    return false; +  } +  case tok::char_constant:          // 'x' +  case tok::wide_char_constant:     // L'x' +  case tok::utf8_char_constant:     // u8'x' +  case tok::utf16_char_constant:    // u'x' +  case tok::utf32_char_constant: {  // U'x' +    // Complain about, and drop, any ud-suffix. +    if (PeekTok.hasUDSuffix()) +      PP.Diag(PeekTok, diag::err_pp_invalid_udl) << /*character*/0; + +    SmallString<32> CharBuffer; +    bool CharInvalid = false; +    StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid); +    if (CharInvalid) +      return true; + +    CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), +                              PeekTok.getLocation(), PP, PeekTok.getKind()); +    if (Literal.hadError()) +      return true;  // A diagnostic was already emitted. + +    // Character literals are always int or wchar_t, expand to intmax_t. +    const TargetInfo &TI = PP.getTargetInfo(); +    unsigned NumBits; +    if (Literal.isMultiChar()) +      NumBits = TI.getIntWidth(); +    else if (Literal.isWide()) +      NumBits = TI.getWCharWidth(); +    else if (Literal.isUTF16()) +      NumBits = TI.getChar16Width(); +    else if (Literal.isUTF32()) +      NumBits = TI.getChar32Width(); +    else // char or char8_t +      NumBits = TI.getCharWidth(); + +    // Set the width. +    llvm::APSInt Val(NumBits); +    // Set the value. +    Val = Literal.getValue(); +    // Set the signedness. UTF-16 and UTF-32 are always unsigned +    if (Literal.isWide()) +      Val.setIsUnsigned(!TargetInfo::isTypeSigned(TI.getWCharType())); +    else if (!Literal.isUTF16() && !Literal.isUTF32()) +      Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned); + +    if (Result.Val.getBitWidth() > Val.getBitWidth()) { +      Result.Val = Val.extend(Result.Val.getBitWidth()); +    } else { +      assert(Result.Val.getBitWidth() == Val.getBitWidth() && +             "intmax_t smaller than char/wchar_t?"); +      Result.Val = Val; +    } + +    // Consume the token. +    Result.setRange(PeekTok.getLocation()); +    PP.LexNonComment(PeekTok); +    return false; +  } +  case tok::l_paren: { +    SourceLocation Start = PeekTok.getLocation(); +    PP.LexNonComment(PeekTok);  // Eat the (. +    // Parse the value and if there are any binary operators involved, parse +    // them. +    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + +    // If this is a silly value like (X), which doesn't need parens, check for +    // !(defined X). +    if (PeekTok.is(tok::r_paren)) { +      // Just use DT unmodified as our result. +    } else { +      // Otherwise, we have something like (x+y), and we consumed '(x'. +      if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, +                                   DT.IncludedUndefinedIds, PP)) +        return true; + +      if (PeekTok.isNot(tok::r_paren)) { +        PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_rparen) +          << Result.getRange(); +        PP.Diag(Start, diag::note_matching) << tok::l_paren; +        return true; +      } +      DT.State = DefinedTracker::Unknown; +    } +    Result.setRange(Start, PeekTok.getLocation()); +    Result.setIdentifier(nullptr); +    PP.LexNonComment(PeekTok);  // Eat the ). +    return false; +  } +  case tok::plus: { +    SourceLocation Start = PeekTok.getLocation(); +    // Unary plus doesn't modify the value. +    PP.LexNonComment(PeekTok); +    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; +    Result.setBegin(Start); +    Result.setIdentifier(nullptr); +    return false; +  } +  case tok::minus: { +    SourceLocation Loc = PeekTok.getLocation(); +    PP.LexNonComment(PeekTok); +    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; +    Result.setBegin(Loc); +    Result.setIdentifier(nullptr); + +    // C99 6.5.3.3p3: The sign of the result matches the sign of the operand. +    Result.Val = -Result.Val; + +    // -MININT is the only thing that overflows.  Unsigned never overflows. +    bool Overflow = !Result.isUnsigned() && Result.Val.isMinSignedValue(); + +    // If this operator is live and overflowed, report the issue. +    if (Overflow && ValueLive) +      PP.Diag(Loc, diag::warn_pp_expr_overflow) << Result.getRange(); + +    DT.State = DefinedTracker::Unknown; +    return false; +  } + +  case tok::tilde: { +    SourceLocation Start = PeekTok.getLocation(); +    PP.LexNonComment(PeekTok); +    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; +    Result.setBegin(Start); +    Result.setIdentifier(nullptr); + +    // C99 6.5.3.3p4: The sign of the result matches the sign of the operand. +    Result.Val = ~Result.Val; +    DT.State = DefinedTracker::Unknown; +    return false; +  } + +  case tok::exclaim: { +    SourceLocation Start = PeekTok.getLocation(); +    PP.LexNonComment(PeekTok); +    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; +    Result.setBegin(Start); +    Result.Val = !Result.Val; +    // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed. +    Result.Val.setIsUnsigned(false); +    Result.setIdentifier(nullptr); + +    if (DT.State == DefinedTracker::DefinedMacro) +      DT.State = DefinedTracker::NotDefinedMacro; +    else if (DT.State == DefinedTracker::NotDefinedMacro) +      DT.State = DefinedTracker::DefinedMacro; +    return false; +  } +  case tok::kw_true: +  case tok::kw_false: +    Result.Val = PeekTok.getKind() == tok::kw_true; +    Result.Val.setIsUnsigned(false); // "0" is signed intmax_t 0. +    Result.setIdentifier(PeekTok.getIdentifierInfo()); +    Result.setRange(PeekTok.getLocation()); +    PP.LexNonComment(PeekTok); +    return false; + +  // FIXME: Handle #assert +  } +} + +/// getPrecedence - Return the precedence of the specified binary operator +/// token.  This returns: +///   ~0 - Invalid token. +///   14 -> 3 - various operators. +///    0 - 'eod' or ')' +static unsigned getPrecedence(tok::TokenKind Kind) { +  switch (Kind) { +  default: return ~0U; +  case tok::percent: +  case tok::slash: +  case tok::star:                 return 14; +  case tok::plus: +  case tok::minus:                return 13; +  case tok::lessless: +  case tok::greatergreater:       return 12; +  case tok::lessequal: +  case tok::less: +  case tok::greaterequal: +  case tok::greater:              return 11; +  case tok::exclaimequal: +  case tok::equalequal:           return 10; +  case tok::amp:                  return 9; +  case tok::caret:                return 8; +  case tok::pipe:                 return 7; +  case tok::ampamp:               return 6; +  case tok::pipepipe:             return 5; +  case tok::question:             return 4; +  case tok::comma:                return 3; +  case tok::colon:                return 2; +  case tok::r_paren:              return 0;// Lowest priority, end of expr. +  case tok::eod:                  return 0;// Lowest priority, end of directive. +  } +} + +static void diagnoseUnexpectedOperator(Preprocessor &PP, PPValue &LHS, +                                       Token &Tok) { +  if (Tok.is(tok::l_paren) && LHS.getIdentifier()) +    PP.Diag(LHS.getRange().getBegin(), diag::err_pp_expr_bad_token_lparen) +        << LHS.getIdentifier(); +  else +    PP.Diag(Tok.getLocation(), diag::err_pp_expr_bad_token_binop) +        << LHS.getRange(); +} + +/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is +/// PeekTok, and whose precedence is PeekPrec.  This returns the result in LHS. +/// +/// If ValueLive is false, then this value is being evaluated in a context where +/// the result is not used.  As such, avoid diagnostics that relate to +/// evaluation, such as division by zero warnings. +static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, +                                     Token &PeekTok, bool ValueLive, +                                     bool &IncludedUndefinedIds, +                                     Preprocessor &PP) { +  unsigned PeekPrec = getPrecedence(PeekTok.getKind()); +  // If this token isn't valid, report the error. +  if (PeekPrec == ~0U) { +    diagnoseUnexpectedOperator(PP, LHS, PeekTok); +    return true; +  } + +  while (true) { +    // If this token has a lower precedence than we are allowed to parse, return +    // it so that higher levels of the recursion can parse it. +    if (PeekPrec < MinPrec) +      return false; + +    tok::TokenKind Operator = PeekTok.getKind(); + +    // If this is a short-circuiting operator, see if the RHS of the operator is +    // dead.  Note that this cannot just clobber ValueLive.  Consider +    // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)".  In +    // this example, the RHS of the && being dead does not make the rest of the +    // expr dead. +    bool RHSIsLive; +    if (Operator == tok::ampamp && LHS.Val == 0) +      RHSIsLive = false;   // RHS of "0 && x" is dead. +    else if (Operator == tok::pipepipe && LHS.Val != 0) +      RHSIsLive = false;   // RHS of "1 || x" is dead. +    else if (Operator == tok::question && LHS.Val == 0) +      RHSIsLive = false;   // RHS (x) of "0 ? x : y" is dead. +    else +      RHSIsLive = ValueLive; + +    // Consume the operator, remembering the operator's location for reporting. +    SourceLocation OpLoc = PeekTok.getLocation(); +    PP.LexNonComment(PeekTok); + +    PPValue RHS(LHS.getBitWidth()); +    // Parse the RHS of the operator. +    DefinedTracker DT; +    if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true; +    IncludedUndefinedIds = DT.IncludedUndefinedIds; + +    // Remember the precedence of this operator and get the precedence of the +    // operator immediately to the right of the RHS. +    unsigned ThisPrec = PeekPrec; +    PeekPrec = getPrecedence(PeekTok.getKind()); + +    // If this token isn't valid, report the error. +    if (PeekPrec == ~0U) { +      diagnoseUnexpectedOperator(PP, RHS, PeekTok); +      return true; +    } + +    // Decide whether to include the next binop in this subexpression.  For +    // example, when parsing x+y*z and looking at '*', we want to recursively +    // handle y*z as a single subexpression.  We do this because the precedence +    // of * is higher than that of +.  The only strange case we have to handle +    // here is for the ?: operator, where the precedence is actually lower than +    // the LHS of the '?'.  The grammar rule is: +    // +    // conditional-expression ::= +    //    logical-OR-expression ? expression : conditional-expression +    // where 'expression' is actually comma-expression. +    unsigned RHSPrec; +    if (Operator == tok::question) +      // The RHS of "?" should be maximally consumed as an expression. +      RHSPrec = getPrecedence(tok::comma); +    else  // All others should munch while higher precedence. +      RHSPrec = ThisPrec+1; + +    if (PeekPrec >= RHSPrec) { +      if (EvaluateDirectiveSubExpr(RHS, RHSPrec, PeekTok, RHSIsLive, +                                   IncludedUndefinedIds, PP)) +        return true; +      PeekPrec = getPrecedence(PeekTok.getKind()); +    } +    assert(PeekPrec <= ThisPrec && "Recursion didn't work!"); + +    // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if +    // either operand is unsigned. +    llvm::APSInt Res(LHS.getBitWidth()); +    switch (Operator) { +    case tok::question:       // No UAC for x and y in "x ? y : z". +    case tok::lessless:       // Shift amount doesn't UAC with shift value. +    case tok::greatergreater: // Shift amount doesn't UAC with shift value. +    case tok::comma:          // Comma operands are not subject to UACs. +    case tok::pipepipe:       // Logical || does not do UACs. +    case tok::ampamp:         // Logical && does not do UACs. +      break;                  // No UAC +    default: +      Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned()); +      // If this just promoted something from signed to unsigned, and if the +      // value was negative, warn about it. +      if (ValueLive && Res.isUnsigned()) { +        if (!LHS.isUnsigned() && LHS.Val.isNegative()) +          PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 0 +            << LHS.Val.toString(10, true) + " to " + +               LHS.Val.toString(10, false) +            << LHS.getRange() << RHS.getRange(); +        if (!RHS.isUnsigned() && RHS.Val.isNegative()) +          PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 1 +            << RHS.Val.toString(10, true) + " to " + +               RHS.Val.toString(10, false) +            << LHS.getRange() << RHS.getRange(); +      } +      LHS.Val.setIsUnsigned(Res.isUnsigned()); +      RHS.Val.setIsUnsigned(Res.isUnsigned()); +    } + +    bool Overflow = false; +    switch (Operator) { +    default: llvm_unreachable("Unknown operator token!"); +    case tok::percent: +      if (RHS.Val != 0) +        Res = LHS.Val % RHS.Val; +      else if (ValueLive) { +        PP.Diag(OpLoc, diag::err_pp_remainder_by_zero) +          << LHS.getRange() << RHS.getRange(); +        return true; +      } +      break; +    case tok::slash: +      if (RHS.Val != 0) { +        if (LHS.Val.isSigned()) +          Res = llvm::APSInt(LHS.Val.sdiv_ov(RHS.Val, Overflow), false); +        else +          Res = LHS.Val / RHS.Val; +      } else if (ValueLive) { +        PP.Diag(OpLoc, diag::err_pp_division_by_zero) +          << LHS.getRange() << RHS.getRange(); +        return true; +      } +      break; + +    case tok::star: +      if (Res.isSigned()) +        Res = llvm::APSInt(LHS.Val.smul_ov(RHS.Val, Overflow), false); +      else +        Res = LHS.Val * RHS.Val; +      break; +    case tok::lessless: { +      // Determine whether overflow is about to happen. +      if (LHS.isUnsigned()) +        Res = LHS.Val.ushl_ov(RHS.Val, Overflow); +      else +        Res = llvm::APSInt(LHS.Val.sshl_ov(RHS.Val, Overflow), false); +      break; +    } +    case tok::greatergreater: { +      // Determine whether overflow is about to happen. +      unsigned ShAmt = static_cast<unsigned>(RHS.Val.getLimitedValue()); +      if (ShAmt >= LHS.getBitWidth()) { +        Overflow = true; +        ShAmt = LHS.getBitWidth()-1; +      } +      Res = LHS.Val >> ShAmt; +      break; +    } +    case tok::plus: +      if (LHS.isUnsigned()) +        Res = LHS.Val + RHS.Val; +      else +        Res = llvm::APSInt(LHS.Val.sadd_ov(RHS.Val, Overflow), false); +      break; +    case tok::minus: +      if (LHS.isUnsigned()) +        Res = LHS.Val - RHS.Val; +      else +        Res = llvm::APSInt(LHS.Val.ssub_ov(RHS.Val, Overflow), false); +      break; +    case tok::lessequal: +      Res = LHS.Val <= RHS.Val; +      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed) +      break; +    case tok::less: +      Res = LHS.Val < RHS.Val; +      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed) +      break; +    case tok::greaterequal: +      Res = LHS.Val >= RHS.Val; +      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed) +      break; +    case tok::greater: +      Res = LHS.Val > RHS.Val; +      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed) +      break; +    case tok::exclaimequal: +      Res = LHS.Val != RHS.Val; +      Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed) +      break; +    case tok::equalequal: +      Res = LHS.Val == RHS.Val; +      Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed) +      break; +    case tok::amp: +      Res = LHS.Val & RHS.Val; +      break; +    case tok::caret: +      Res = LHS.Val ^ RHS.Val; +      break; +    case tok::pipe: +      Res = LHS.Val | RHS.Val; +      break; +    case tok::ampamp: +      Res = (LHS.Val != 0 && RHS.Val != 0); +      Res.setIsUnsigned(false);  // C99 6.5.13p3, result is always int (signed) +      break; +    case tok::pipepipe: +      Res = (LHS.Val != 0 || RHS.Val != 0); +      Res.setIsUnsigned(false);  // C99 6.5.14p3, result is always int (signed) +      break; +    case tok::comma: +      // Comma is invalid in pp expressions in c89/c++ mode, but is valid in C99 +      // if not being evaluated. +      if (!PP.getLangOpts().C99 || ValueLive) +        PP.Diag(OpLoc, diag::ext_pp_comma_expr) +          << LHS.getRange() << RHS.getRange(); +      Res = RHS.Val; // LHS = LHS,RHS -> RHS. +      break; +    case tok::question: { +      // Parse the : part of the expression. +      if (PeekTok.isNot(tok::colon)) { +        PP.Diag(PeekTok.getLocation(), diag::err_expected) +            << tok::colon << LHS.getRange() << RHS.getRange(); +        PP.Diag(OpLoc, diag::note_matching) << tok::question; +        return true; +      } +      // Consume the :. +      PP.LexNonComment(PeekTok); + +      // Evaluate the value after the :. +      bool AfterColonLive = ValueLive && LHS.Val == 0; +      PPValue AfterColonVal(LHS.getBitWidth()); +      DefinedTracker DT; +      if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP)) +        return true; + +      // Parse anything after the : with the same precedence as ?.  We allow +      // things of equal precedence because ?: is right associative. +      if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec, +                                   PeekTok, AfterColonLive, +                                   IncludedUndefinedIds, PP)) +        return true; + +      // Now that we have the condition, the LHS and the RHS of the :, evaluate. +      Res = LHS.Val != 0 ? RHS.Val : AfterColonVal.Val; +      RHS.setEnd(AfterColonVal.getRange().getEnd()); + +      // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if +      // either operand is unsigned. +      Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned()); + +      // Figure out the precedence of the token after the : part. +      PeekPrec = getPrecedence(PeekTok.getKind()); +      break; +    } +    case tok::colon: +      // Don't allow :'s to float around without being part of ?: exprs. +      PP.Diag(OpLoc, diag::err_pp_colon_without_question) +        << LHS.getRange() << RHS.getRange(); +      return true; +    } + +    // If this operator is live and overflowed, report the issue. +    if (Overflow && ValueLive) +      PP.Diag(OpLoc, diag::warn_pp_expr_overflow) +        << LHS.getRange() << RHS.getRange(); + +    // Put the result back into 'LHS' for our next iteration. +    LHS.Val = Res; +    LHS.setEnd(RHS.getRange().getEnd()); +    RHS.setIdentifier(nullptr); +  } +} + +/// EvaluateDirectiveExpression - Evaluate an integer constant expression that +/// may occur after a #if or #elif directive.  If the expression is equivalent +/// to "!defined(X)" return X in IfNDefMacro. +Preprocessor::DirectiveEvalResult +Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { +  SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true); +  // Save the current state of 'DisableMacroExpansion' and reset it to false. If +  // 'DisableMacroExpansion' is true, then we must be in a macro argument list +  // in which case a directive is undefined behavior.  We want macros to be able +  // to recursively expand in order to get more gcc-list behavior, so we force +  // DisableMacroExpansion to false and restore it when we're done parsing the +  // expression. +  bool DisableMacroExpansionAtStartOfDirective = DisableMacroExpansion; +  DisableMacroExpansion = false; + +  // Peek ahead one token. +  Token Tok; +  LexNonComment(Tok); + +  // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. +  unsigned BitWidth = getTargetInfo().getIntMaxTWidth(); + +  PPValue ResVal(BitWidth); +  DefinedTracker DT; +  SourceLocation ExprStartLoc = SourceMgr.getExpansionLoc(Tok.getLocation()); +  if (EvaluateValue(ResVal, Tok, DT, true, *this)) { +    // Parse error, skip the rest of the macro line. +    SourceRange ConditionRange = ExprStartLoc; +    if (Tok.isNot(tok::eod)) +      ConditionRange = DiscardUntilEndOfDirective(); + +    // Restore 'DisableMacroExpansion'. +    DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; + +    // We cannot trust the source range from the value because there was a +    // parse error. Track the range manually -- the end of the directive is the +    // end of the condition range. +    return {false, +            DT.IncludedUndefinedIds, +            {ExprStartLoc, ConditionRange.getEnd()}}; +  } + +  // If we are at the end of the expression after just parsing a value, there +  // must be no (unparenthesized) binary operators involved, so we can exit +  // directly. +  if (Tok.is(tok::eod)) { +    // If the expression we parsed was of the form !defined(macro), return the +    // macro in IfNDefMacro. +    if (DT.State == DefinedTracker::NotDefinedMacro) +      IfNDefMacro = DT.TheMacro; + +    // Restore 'DisableMacroExpansion'. +    DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; +    return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; +  } + +  // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the +  // operator and the stuff after it. +  if (EvaluateDirectiveSubExpr(ResVal, getPrecedence(tok::question), +                               Tok, true, DT.IncludedUndefinedIds, *this)) { +    // Parse error, skip the rest of the macro line. +    if (Tok.isNot(tok::eod)) +      DiscardUntilEndOfDirective(); + +    // Restore 'DisableMacroExpansion'. +    DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; +    return {false, DT.IncludedUndefinedIds, ResVal.getRange()}; +  } + +  // If we aren't at the tok::eod token, something bad happened, like an extra +  // ')' token. +  if (Tok.isNot(tok::eod)) { +    Diag(Tok, diag::err_pp_expected_eol); +    DiscardUntilEndOfDirective(); +  } + +  // Restore 'DisableMacroExpansion'. +  DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; +  return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; +} diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp new file mode 100644 index 000000000000..802172693960 --- /dev/null +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -0,0 +1,834 @@ +//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements pieces of the Preprocessor interface that manage the +// current lexer stack. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/MacroInfo.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +using namespace clang; + +PPCallbacks::~PPCallbacks() {} + +//===----------------------------------------------------------------------===// +// Miscellaneous Methods. +//===----------------------------------------------------------------------===// + +/// isInPrimaryFile - Return true if we're in the top-level file, not in a +/// \#include.  This looks through macro expansions and active _Pragma lexers. +bool Preprocessor::isInPrimaryFile() const { +  if (IsFileLexer()) +    return IncludeMacroStack.empty(); + +  // If there are any stacked lexers, we're in a #include. +  assert(IsFileLexer(IncludeMacroStack[0]) && +         "Top level include stack isn't our primary lexer?"); +  return std::none_of( +      IncludeMacroStack.begin() + 1, IncludeMacroStack.end(), +      [&](const IncludeStackInfo &ISI) -> bool { return IsFileLexer(ISI); }); +} + +/// getCurrentLexer - Return the current file lexer being lexed from.  Note +/// that this ignores any potentially active macro expansions and _Pragma +/// expansions going on at the time. +PreprocessorLexer *Preprocessor::getCurrentFileLexer() const { +  if (IsFileLexer()) +    return CurPPLexer; + +  // Look for a stacked lexer. +  for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) { +    if (IsFileLexer(ISI)) +      return ISI.ThePPLexer; +  } +  return nullptr; +} + + +//===----------------------------------------------------------------------===// +// Methods for Entering and Callbacks for leaving various contexts +//===----------------------------------------------------------------------===// + +/// EnterSourceFile - Add a source file to the top of the include stack and +/// start lexing tokens from it instead of the current buffer. +bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir, +                                   SourceLocation Loc) { +  assert(!CurTokenLexer && "Cannot #include a file inside a macro!"); +  ++NumEnteredSourceFiles; + +  if (MaxIncludeStackDepth < IncludeMacroStack.size()) +    MaxIncludeStackDepth = IncludeMacroStack.size(); + +  // Get the MemoryBuffer for this FID, if it fails, we fail. +  bool Invalid = false; +  const llvm::MemoryBuffer *InputFile = +    getSourceManager().getBuffer(FID, Loc, &Invalid); +  if (Invalid) { +    SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID); +    Diag(Loc, diag::err_pp_error_opening_file) +      << std::string(SourceMgr.getBufferName(FileStart)) << ""; +    return true; +  } + +  if (isCodeCompletionEnabled() && +      SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) { +    CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID); +    CodeCompletionLoc = +        CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset); +  } + +  EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir); +  return false; +} + +/// EnterSourceFileWithLexer - Add a source file to the top of the include stack +///  and start lexing tokens from it instead of the current buffer. +void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, +                                            const DirectoryLookup *CurDir) { + +  // Add the current lexer to the include stack. +  if (CurPPLexer || CurTokenLexer) +    PushIncludeMacroStack(); + +  CurLexer.reset(TheLexer); +  CurPPLexer = TheLexer; +  CurDirLookup = CurDir; +  CurLexerSubmodule = nullptr; +  if (CurLexerKind != CLK_LexAfterModuleImport) +    CurLexerKind = CLK_Lexer; + +  // Notify the client, if desired, that we are in a new source file. +  if (Callbacks && !CurLexer->Is_PragmaLexer) { +    SrcMgr::CharacteristicKind FileType = +       SourceMgr.getFileCharacteristic(CurLexer->getFileLoc()); + +    Callbacks->FileChanged(CurLexer->getFileLoc(), +                           PPCallbacks::EnterFile, FileType); +  } +} + +/// EnterMacro - Add a Macro to the top of the include stack and start lexing +/// tokens from it instead of the current buffer. +void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, +                              MacroInfo *Macro, MacroArgs *Args) { +  std::unique_ptr<TokenLexer> TokLexer; +  if (NumCachedTokenLexers == 0) { +    TokLexer = std::make_unique<TokenLexer>(Tok, ILEnd, Macro, Args, *this); +  } else { +    TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]); +    TokLexer->Init(Tok, ILEnd, Macro, Args); +  } + +  PushIncludeMacroStack(); +  CurDirLookup = nullptr; +  CurTokenLexer = std::move(TokLexer); +  if (CurLexerKind != CLK_LexAfterModuleImport) +    CurLexerKind = CLK_TokenLexer; +} + +/// EnterTokenStream - Add a "macro" context to the top of the include stack, +/// which will cause the lexer to start returning the specified tokens. +/// +/// If DisableMacroExpansion is true, tokens lexed from the token stream will +/// not be subject to further macro expansion.  Otherwise, these tokens will +/// be re-macro-expanded when/if expansion is enabled. +/// +/// If OwnsTokens is false, this method assumes that the specified stream of +/// tokens has a permanent owner somewhere, so they do not need to be copied. +/// If it is true, it assumes the array of tokens is allocated with new[] and +/// must be freed. +/// +void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, +                                    bool DisableMacroExpansion, bool OwnsTokens, +                                    bool IsReinject) { +  if (CurLexerKind == CLK_CachingLexer) { +    if (CachedLexPos < CachedTokens.size()) { +      assert(IsReinject && "new tokens in the middle of cached stream"); +      // We're entering tokens into the middle of our cached token stream. We +      // can't represent that, so just insert the tokens into the buffer. +      CachedTokens.insert(CachedTokens.begin() + CachedLexPos, +                          Toks, Toks + NumToks); +      if (OwnsTokens) +        delete [] Toks; +      return; +    } + +    // New tokens are at the end of the cached token sequnece; insert the +    // token stream underneath the caching lexer. +    ExitCachingLexMode(); +    EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens, +                     IsReinject); +    EnterCachingLexMode(); +    return; +  } + +  // Create a macro expander to expand from the specified token stream. +  std::unique_ptr<TokenLexer> TokLexer; +  if (NumCachedTokenLexers == 0) { +    TokLexer = std::make_unique<TokenLexer>( +        Toks, NumToks, DisableMacroExpansion, OwnsTokens, IsReinject, *this); +  } else { +    TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]); +    TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens, +                   IsReinject); +  } + +  // Save our current state. +  PushIncludeMacroStack(); +  CurDirLookup = nullptr; +  CurTokenLexer = std::move(TokLexer); +  if (CurLexerKind != CLK_LexAfterModuleImport) +    CurLexerKind = CLK_TokenLexer; +} + +/// Compute the relative path that names the given file relative to +/// the given directory. +static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir, +                                const FileEntry *File, +                                SmallString<128> &Result) { +  Result.clear(); + +  StringRef FilePath = File->getDir()->getName(); +  StringRef Path = FilePath; +  while (!Path.empty()) { +    if (auto CurDir = FM.getDirectory(Path)) { +      if (*CurDir == Dir) { +        Result = FilePath.substr(Path.size()); +        llvm::sys::path::append(Result, +                                llvm::sys::path::filename(File->getName())); +        return; +      } +    } + +    Path = llvm::sys::path::parent_path(Path); +  } + +  Result = File->getName(); +} + +void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) { +  if (CurTokenLexer) { +    CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result); +    return; +  } +  if (CurLexer) { +    CurLexer->PropagateLineStartLeadingSpaceInfo(Result); +    return; +  } +  // FIXME: Handle other kinds of lexers?  It generally shouldn't matter, +  // but it might if they're empty? +} + +/// Determine the location to use as the end of the buffer for a lexer. +/// +/// If the file ends with a newline, form the EOF token on the newline itself, +/// rather than "on the line following it", which doesn't exist.  This makes +/// diagnostics relating to the end of file include the last file that the user +/// actually typed, which is goodness. +const char *Preprocessor::getCurLexerEndPos() { +  const char *EndPos = CurLexer->BufferEnd; +  if (EndPos != CurLexer->BufferStart && +      (EndPos[-1] == '\n' || EndPos[-1] == '\r')) { +    --EndPos; + +    // Handle \n\r and \r\n: +    if (EndPos != CurLexer->BufferStart && +        (EndPos[-1] == '\n' || EndPos[-1] == '\r') && +        EndPos[-1] != EndPos[0]) +      --EndPos; +  } + +  return EndPos; +} + +static void collectAllSubModulesWithUmbrellaHeader( +    const Module &Mod, SmallVectorImpl<const Module *> &SubMods) { +  if (Mod.getUmbrellaHeader()) +    SubMods.push_back(&Mod); +  for (auto *M : Mod.submodules()) +    collectAllSubModulesWithUmbrellaHeader(*M, SubMods); +} + +void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) { +  assert(Mod.getUmbrellaHeader() && "Module must use umbrella header"); +  SourceLocation StartLoc = +      SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); +  if (getDiagnostics().isIgnored(diag::warn_uncovered_module_header, StartLoc)) +    return; + +  ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap(); +  const DirectoryEntry *Dir = Mod.getUmbrellaDir().Entry; +  llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem(); +  std::error_code EC; +  for (llvm::vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC), +       End; +       Entry != End && !EC; Entry.increment(EC)) { +    using llvm::StringSwitch; + +    // Check whether this entry has an extension typically associated with +    // headers. +    if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->path())) +             .Cases(".h", ".H", ".hh", ".hpp", true) +             .Default(false)) +      continue; + +    if (auto Header = getFileManager().getFile(Entry->path())) +      if (!getSourceManager().hasFileInfo(*Header)) { +        if (!ModMap.isHeaderInUnavailableModule(*Header)) { +          // Find the relative path that would access this header. +          SmallString<128> RelativePath; +          computeRelativePath(FileMgr, Dir, *Header, RelativePath); +          Diag(StartLoc, diag::warn_uncovered_module_header) +              << Mod.getFullModuleName() << RelativePath; +        } +      } +  } +} + +/// HandleEndOfFile - This callback is invoked when the lexer hits the end of +/// the current file.  This either returns the EOF token or pops a level off +/// the include stack and keeps going. +bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { +  assert(!CurTokenLexer && +         "Ending a file when currently in a macro!"); + +  // If we have an unclosed module region from a pragma at the end of a +  // module, complain and close it now. +  const bool LeavingSubmodule = CurLexer && CurLexerSubmodule; +  if ((LeavingSubmodule || IncludeMacroStack.empty()) && +      !BuildingSubmoduleStack.empty() && +      BuildingSubmoduleStack.back().IsPragma) { +    Diag(BuildingSubmoduleStack.back().ImportLoc, +         diag::err_pp_module_begin_without_module_end); +    Module *M = LeaveSubmodule(/*ForPragma*/true); + +    Result.startToken(); +    const char *EndPos = getCurLexerEndPos(); +    CurLexer->BufferPtr = EndPos; +    CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end); +    Result.setAnnotationEndLoc(Result.getLocation()); +    Result.setAnnotationValue(M); +    return true; +  } + +  // See if this file had a controlling macro. +  if (CurPPLexer) {  // Not ending a macro, ignore it. +    if (const IdentifierInfo *ControllingMacro = +          CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) { +      // Okay, this has a controlling macro, remember in HeaderFileInfo. +      if (const FileEntry *FE = CurPPLexer->getFileEntry()) { +        HeaderInfo.SetFileControllingMacro(FE, ControllingMacro); +        if (MacroInfo *MI = +              getMacroInfo(const_cast<IdentifierInfo*>(ControllingMacro))) +          MI->setUsedForHeaderGuard(true); +        if (const IdentifierInfo *DefinedMacro = +              CurPPLexer->MIOpt.GetDefinedMacro()) { +          if (!isMacroDefined(ControllingMacro) && +              DefinedMacro != ControllingMacro && +              HeaderInfo.FirstTimeLexingFile(FE)) { + +            // If the edit distance between the two macros is more than 50%, +            // DefinedMacro may not be header guard, or can be header guard of +            // another header file. Therefore, it maybe defining something +            // completely different. This can be observed in the wild when +            // handling feature macros or header guards in different files. + +            const StringRef ControllingMacroName = ControllingMacro->getName(); +            const StringRef DefinedMacroName = DefinedMacro->getName(); +            const size_t MaxHalfLength = std::max(ControllingMacroName.size(), +                                                  DefinedMacroName.size()) / 2; +            const unsigned ED = ControllingMacroName.edit_distance( +                DefinedMacroName, true, MaxHalfLength); +            if (ED <= MaxHalfLength) { +              // Emit a warning for a bad header guard. +              Diag(CurPPLexer->MIOpt.GetMacroLocation(), +                   diag::warn_header_guard) +                  << CurPPLexer->MIOpt.GetMacroLocation() << ControllingMacro; +              Diag(CurPPLexer->MIOpt.GetDefinedLocation(), +                   diag::note_header_guard) +                  << CurPPLexer->MIOpt.GetDefinedLocation() << DefinedMacro +                  << ControllingMacro +                  << FixItHint::CreateReplacement( +                         CurPPLexer->MIOpt.GetDefinedLocation(), +                         ControllingMacro->getName()); +            } +          } +        } +      } +    } +  } + +  // Complain about reaching a true EOF within arc_cf_code_audited. +  // We don't want to complain about reaching the end of a macro +  // instantiation or a _Pragma. +  if (PragmaARCCFCodeAuditedInfo.second.isValid() && !isEndOfMacro && +      !(CurLexer && CurLexer->Is_PragmaLexer)) { +    Diag(PragmaARCCFCodeAuditedInfo.second, +         diag::err_pp_eof_in_arc_cf_code_audited); + +    // Recover by leaving immediately. +    PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()}; +  } + +  // Complain about reaching a true EOF within assume_nonnull. +  // We don't want to complain about reaching the end of a macro +  // instantiation or a _Pragma. +  if (PragmaAssumeNonNullLoc.isValid() && +      !isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) { +    Diag(PragmaAssumeNonNullLoc, diag::err_pp_eof_in_assume_nonnull); + +    // Recover by leaving immediately. +    PragmaAssumeNonNullLoc = SourceLocation(); +  } + +  bool LeavingPCHThroughHeader = false; + +  // If this is a #include'd file, pop it off the include stack and continue +  // lexing the #includer file. +  if (!IncludeMacroStack.empty()) { + +    // If we lexed the code-completion file, act as if we reached EOF. +    if (isCodeCompletionEnabled() && CurPPLexer && +        SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) == +            CodeCompletionFileLoc) { +      assert(CurLexer && "Got EOF but no current lexer set!"); +      Result.startToken(); +      CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); +      CurLexer.reset(); + +      CurPPLexer = nullptr; +      recomputeCurLexerKind(); +      return true; +    } + +    if (!isEndOfMacro && CurPPLexer && +        SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) { +      // Notify SourceManager to record the number of FileIDs that were created +      // during lexing of the #include'd file. +      unsigned NumFIDs = +          SourceMgr.local_sloc_entry_size() - +          CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/; +      SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs); +    } + +    bool ExitedFromPredefinesFile = false; +    FileID ExitedFID; +    if (!isEndOfMacro && CurPPLexer) { +      ExitedFID = CurPPLexer->getFileID(); + +      assert(PredefinesFileID.isValid() && +             "HandleEndOfFile is called before PredefinesFileId is set"); +      ExitedFromPredefinesFile = (PredefinesFileID == ExitedFID); +    } + +    if (LeavingSubmodule) { +      // We're done with this submodule. +      Module *M = LeaveSubmodule(/*ForPragma*/false); + +      // Notify the parser that we've left the module. +      const char *EndPos = getCurLexerEndPos(); +      Result.startToken(); +      CurLexer->BufferPtr = EndPos; +      CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end); +      Result.setAnnotationEndLoc(Result.getLocation()); +      Result.setAnnotationValue(M); +    } + +    bool FoundPCHThroughHeader = false; +    if (CurPPLexer && creatingPCHWithThroughHeader() && +        isPCHThroughHeader( +            SourceMgr.getFileEntryForID(CurPPLexer->getFileID()))) +      FoundPCHThroughHeader = true; + +    // We're done with the #included file. +    RemoveTopOfLexerStack(); + +    // Propagate info about start-of-line/leading white-space/etc. +    PropagateLineStartLeadingSpaceInfo(Result); + +    // Notify the client, if desired, that we are in a new source file. +    if (Callbacks && !isEndOfMacro && CurPPLexer) { +      SrcMgr::CharacteristicKind FileType = +        SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation()); +      Callbacks->FileChanged(CurPPLexer->getSourceLocation(), +                             PPCallbacks::ExitFile, FileType, ExitedFID); +    } + +    // Restore conditional stack from the preamble right after exiting from the +    // predefines file. +    if (ExitedFromPredefinesFile) +      replayPreambleConditionalStack(); + +    if (!isEndOfMacro && CurPPLexer && FoundPCHThroughHeader && +        (isInPrimaryFile() || +         CurPPLexer->getFileID() == getPredefinesFileID())) { +      // Leaving the through header. Continue directly to end of main file +      // processing. +      LeavingPCHThroughHeader = true; +    } else { +      // Client should lex another token unless we generated an EOM. +      return LeavingSubmodule; +    } +  } + +  // If this is the end of the main file, form an EOF token. +  assert(CurLexer && "Got EOF but no current lexer set!"); +  const char *EndPos = getCurLexerEndPos(); +  Result.startToken(); +  CurLexer->BufferPtr = EndPos; +  CurLexer->FormTokenWithChars(Result, EndPos, tok::eof); + +  if (isCodeCompletionEnabled()) { +    // Inserting the code-completion point increases the source buffer by 1, +    // but the main FileID was created before inserting the point. +    // Compensate by reducing the EOF location by 1, otherwise the location +    // will point to the next FileID. +    // FIXME: This is hacky, the code-completion point should probably be +    // inserted before the main FileID is created. +    if (CurLexer->getFileLoc() == CodeCompletionFileLoc) +      Result.setLocation(Result.getLocation().getLocWithOffset(-1)); +  } + +  if (creatingPCHWithThroughHeader() && !LeavingPCHThroughHeader) { +    // Reached the end of the compilation without finding the through header. +    Diag(CurLexer->getFileLoc(), diag::err_pp_through_header_not_seen) +        << PPOpts->PCHThroughHeader << 0; +  } + +  if (!isIncrementalProcessingEnabled()) +    // We're done with lexing. +    CurLexer.reset(); + +  if (!isIncrementalProcessingEnabled()) +    CurPPLexer = nullptr; + +  if (TUKind == TU_Complete) { +    // This is the end of the top-level file. 'WarnUnusedMacroLocs' has +    // collected all macro locations that we need to warn because they are not +    // used. +    for (WarnUnusedMacroLocsTy::iterator +           I=WarnUnusedMacroLocs.begin(), E=WarnUnusedMacroLocs.end(); +           I!=E; ++I) +      Diag(*I, diag::pp_macro_not_used); +  } + +  // If we are building a module that has an umbrella header, make sure that +  // each of the headers within the directory, including all submodules, is +  // covered by the umbrella header was actually included by the umbrella +  // header. +  if (Module *Mod = getCurrentModule()) { +    llvm::SmallVector<const Module *, 4> AllMods; +    collectAllSubModulesWithUmbrellaHeader(*Mod, AllMods); +    for (auto *M : AllMods) +      diagnoseMissingHeaderInUmbrellaDir(*M); +  } + +  return true; +} + +/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer +/// hits the end of its token stream. +bool Preprocessor::HandleEndOfTokenLexer(Token &Result) { +  assert(CurTokenLexer && !CurPPLexer && +         "Ending a macro when currently in a #include file!"); + +  if (!MacroExpandingLexersStack.empty() && +      MacroExpandingLexersStack.back().first == CurTokenLexer.get()) +    removeCachedMacroExpandedTokensOfLastLexer(); + +  // Delete or cache the now-dead macro expander. +  if (NumCachedTokenLexers == TokenLexerCacheSize) +    CurTokenLexer.reset(); +  else +    TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer); + +  // Handle this like a #include file being popped off the stack. +  return HandleEndOfFile(Result, true); +} + +/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the +/// lexer stack.  This should only be used in situations where the current +/// state of the top-of-stack lexer is unknown. +void Preprocessor::RemoveTopOfLexerStack() { +  assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load"); + +  if (CurTokenLexer) { +    // Delete or cache the now-dead macro expander. +    if (NumCachedTokenLexers == TokenLexerCacheSize) +      CurTokenLexer.reset(); +    else +      TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer); +  } + +  PopIncludeMacroStack(); +} + +/// HandleMicrosoftCommentPaste - When the macro expander pastes together a +/// comment (/##/) in microsoft mode, this method handles updating the current +/// state, returning the token on the next source line. +void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) { +  assert(CurTokenLexer && !CurPPLexer && +         "Pasted comment can only be formed from macro"); +  // We handle this by scanning for the closest real lexer, switching it to +  // raw mode and preprocessor mode.  This will cause it to return \n as an +  // explicit EOD token. +  PreprocessorLexer *FoundLexer = nullptr; +  bool LexerWasInPPMode = false; +  for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) { +    if (ISI.ThePPLexer == nullptr) continue;  // Scan for a real lexer. + +    // Once we find a real lexer, mark it as raw mode (disabling macro +    // expansions) and preprocessor mode (return EOD).  We know that the lexer +    // was *not* in raw mode before, because the macro that the comment came +    // from was expanded.  However, it could have already been in preprocessor +    // mode (#if COMMENT) in which case we have to return it to that mode and +    // return EOD. +    FoundLexer = ISI.ThePPLexer; +    FoundLexer->LexingRawMode = true; +    LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective; +    FoundLexer->ParsingPreprocessorDirective = true; +    break; +  } + +  // Okay, we either found and switched over the lexer, or we didn't find a +  // lexer.  In either case, finish off the macro the comment came from, getting +  // the next token. +  if (!HandleEndOfTokenLexer(Tok)) Lex(Tok); + +  // Discarding comments as long as we don't have EOF or EOD.  This 'comments +  // out' the rest of the line, including any tokens that came from other macros +  // that were active, as in: +  //  #define submacro a COMMENT b +  //    submacro c +  // which should lex to 'a' only: 'b' and 'c' should be removed. +  while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof)) +    Lex(Tok); + +  // If we got an eod token, then we successfully found the end of the line. +  if (Tok.is(tok::eod)) { +    assert(FoundLexer && "Can't get end of line without an active lexer"); +    // Restore the lexer back to normal mode instead of raw mode. +    FoundLexer->LexingRawMode = false; + +    // If the lexer was already in preprocessor mode, just return the EOD token +    // to finish the preprocessor line. +    if (LexerWasInPPMode) return; + +    // Otherwise, switch out of PP mode and return the next lexed token. +    FoundLexer->ParsingPreprocessorDirective = false; +    return Lex(Tok); +  } + +  // If we got an EOF token, then we reached the end of the token stream but +  // didn't find an explicit \n.  This can only happen if there was no lexer +  // active (an active lexer would return EOD at EOF if there was no \n in +  // preprocessor directive mode), so just return EOF as our token. +  assert(!FoundLexer && "Lexer should return EOD before EOF in PP mode"); +} + +void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc, +                                  bool ForPragma) { +  if (!getLangOpts().ModulesLocalVisibility) { +    // Just track that we entered this submodule. +    BuildingSubmoduleStack.push_back( +        BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState, +                              PendingModuleMacroNames.size())); +    if (Callbacks) +      Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma); +    return; +  } + +  // Resolve as much of the module definition as we can now, before we enter +  // one of its headers. +  // FIXME: Can we enable Complain here? +  // FIXME: Can we do this when local visibility is disabled? +  ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap(); +  ModMap.resolveExports(M, /*Complain=*/false); +  ModMap.resolveUses(M, /*Complain=*/false); +  ModMap.resolveConflicts(M, /*Complain=*/false); + +  // If this is the first time we've entered this module, set up its state. +  auto R = Submodules.insert(std::make_pair(M, SubmoduleState())); +  auto &State = R.first->second; +  bool FirstTime = R.second; +  if (FirstTime) { +    // Determine the set of starting macros for this submodule; take these +    // from the "null" module (the predefines buffer). +    // +    // FIXME: If we have local visibility but not modules enabled, the +    // NullSubmoduleState is polluted by #defines in the top-level source +    // file. +    auto &StartingMacros = NullSubmoduleState.Macros; + +    // Restore to the starting state. +    // FIXME: Do this lazily, when each macro name is first referenced. +    for (auto &Macro : StartingMacros) { +      // Skip uninteresting macros. +      if (!Macro.second.getLatest() && +          Macro.second.getOverriddenMacros().empty()) +        continue; + +      MacroState MS(Macro.second.getLatest()); +      MS.setOverriddenMacros(*this, Macro.second.getOverriddenMacros()); +      State.Macros.insert(std::make_pair(Macro.first, std::move(MS))); +    } +  } + +  // Track that we entered this module. +  BuildingSubmoduleStack.push_back( +      BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState, +                            PendingModuleMacroNames.size())); + +  if (Callbacks) +    Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma); + +  // Switch to this submodule as the current submodule. +  CurSubmoduleState = &State; + +  // This module is visible to itself. +  if (FirstTime) +    makeModuleVisible(M, ImportLoc); +} + +bool Preprocessor::needModuleMacros() const { +  // If we're not within a submodule, we never need to create ModuleMacros. +  if (BuildingSubmoduleStack.empty()) +    return false; +  // If we are tracking module macro visibility even for textually-included +  // headers, we need ModuleMacros. +  if (getLangOpts().ModulesLocalVisibility) +    return true; +  // Otherwise, we only need module macros if we're actually compiling a module +  // interface. +  return getLangOpts().isCompilingModule(); +} + +Module *Preprocessor::LeaveSubmodule(bool ForPragma) { +  if (BuildingSubmoduleStack.empty() || +      BuildingSubmoduleStack.back().IsPragma != ForPragma) { +    assert(ForPragma && "non-pragma module enter/leave mismatch"); +    return nullptr; +  } + +  auto &Info = BuildingSubmoduleStack.back(); + +  Module *LeavingMod = Info.M; +  SourceLocation ImportLoc = Info.ImportLoc; + +  if (!needModuleMacros() || +      (!getLangOpts().ModulesLocalVisibility && +       LeavingMod->getTopLevelModuleName() != getLangOpts().CurrentModule)) { +    // If we don't need module macros, or this is not a module for which we +    // are tracking macro visibility, don't build any, and preserve the list +    // of pending names for the surrounding submodule. +    BuildingSubmoduleStack.pop_back(); + +    if (Callbacks) +      Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma); + +    makeModuleVisible(LeavingMod, ImportLoc); +    return LeavingMod; +  } + +  // Create ModuleMacros for any macros defined in this submodule. +  llvm::SmallPtrSet<const IdentifierInfo*, 8> VisitedMacros; +  for (unsigned I = Info.OuterPendingModuleMacroNames; +       I != PendingModuleMacroNames.size(); ++I) { +    auto *II = const_cast<IdentifierInfo*>(PendingModuleMacroNames[I]); +    if (!VisitedMacros.insert(II).second) +      continue; + +    auto MacroIt = CurSubmoduleState->Macros.find(II); +    if (MacroIt == CurSubmoduleState->Macros.end()) +      continue; +    auto &Macro = MacroIt->second; + +    // Find the starting point for the MacroDirective chain in this submodule. +    MacroDirective *OldMD = nullptr; +    auto *OldState = Info.OuterSubmoduleState; +    if (getLangOpts().ModulesLocalVisibility) +      OldState = &NullSubmoduleState; +    if (OldState && OldState != CurSubmoduleState) { +      // FIXME: It'd be better to start at the state from when we most recently +      // entered this submodule, but it doesn't really matter. +      auto &OldMacros = OldState->Macros; +      auto OldMacroIt = OldMacros.find(II); +      if (OldMacroIt == OldMacros.end()) +        OldMD = nullptr; +      else +        OldMD = OldMacroIt->second.getLatest(); +    } + +    // This module may have exported a new macro. If so, create a ModuleMacro +    // representing that fact. +    bool ExplicitlyPublic = false; +    for (auto *MD = Macro.getLatest(); MD != OldMD; MD = MD->getPrevious()) { +      assert(MD && "broken macro directive chain"); + +      if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) { +        // The latest visibility directive for a name in a submodule affects +        // all the directives that come before it. +        if (VisMD->isPublic()) +          ExplicitlyPublic = true; +        else if (!ExplicitlyPublic) +          // Private with no following public directive: not exported. +          break; +      } else { +        MacroInfo *Def = nullptr; +        if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD)) +          Def = DefMD->getInfo(); + +        // FIXME: Issue a warning if multiple headers for the same submodule +        // define a macro, rather than silently ignoring all but the first. +        bool IsNew; +        // Don't bother creating a module macro if it would represent a #undef +        // that doesn't override anything. +        if (Def || !Macro.getOverriddenMacros().empty()) +          addModuleMacro(LeavingMod, II, Def, +                         Macro.getOverriddenMacros(), IsNew); + +        if (!getLangOpts().ModulesLocalVisibility) { +          // This macro is exposed to the rest of this compilation as a +          // ModuleMacro; we don't need to track its MacroDirective any more. +          Macro.setLatest(nullptr); +          Macro.setOverriddenMacros(*this, {}); +        } +        break; +      } +    } +  } +  PendingModuleMacroNames.resize(Info.OuterPendingModuleMacroNames); + +  // FIXME: Before we leave this submodule, we should parse all the other +  // headers within it. Otherwise, we're left with an inconsistent state +  // where we've made the module visible but don't yet have its complete +  // contents. + +  // Put back the outer module's state, if we're tracking it. +  if (getLangOpts().ModulesLocalVisibility) +    CurSubmoduleState = Info.OuterSubmoduleState; + +  BuildingSubmoduleStack.pop_back(); + +  if (Callbacks) +    Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma); + +  // A nested #include makes the included submodule visible. +  makeModuleVisible(LeavingMod, ImportLoc); +  return LeavingMod; +} diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp new file mode 100644 index 000000000000..dfbcaedcacff --- /dev/null +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -0,0 +1,1845 @@ +//===--- PPMacroExpansion.cpp - Top level Macro Expansion -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the top level handling of macro expansion for the +// preprocessor. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Attributes.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/ObjCRuntime.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/DirectoryLookup.h" +#include "clang/Lex/ExternalPreprocessorSource.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/MacroArgs.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorLexer.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstring> +#include <ctime> +#include <string> +#include <tuple> +#include <utility> + +using namespace clang; + +MacroDirective * +Preprocessor::getLocalMacroDirectiveHistory(const IdentifierInfo *II) const { +  if (!II->hadMacroDefinition()) +    return nullptr; +  auto Pos = CurSubmoduleState->Macros.find(II); +  return Pos == CurSubmoduleState->Macros.end() ? nullptr +                                                : Pos->second.getLatest(); +} + +void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){ +  assert(MD && "MacroDirective should be non-zero!"); +  assert(!MD->getPrevious() && "Already attached to a MacroDirective history."); + +  MacroState &StoredMD = CurSubmoduleState->Macros[II]; +  auto *OldMD = StoredMD.getLatest(); +  MD->setPrevious(OldMD); +  StoredMD.setLatest(MD); +  StoredMD.overrideActiveModuleMacros(*this, II); + +  if (needModuleMacros()) { +    // Track that we created a new macro directive, so we know we should +    // consider building a ModuleMacro for it when we get to the end of +    // the module. +    PendingModuleMacroNames.push_back(II); +  } + +  // Set up the identifier as having associated macro history. +  II->setHasMacroDefinition(true); +  if (!MD->isDefined() && LeafModuleMacros.find(II) == LeafModuleMacros.end()) +    II->setHasMacroDefinition(false); +  if (II->isFromAST()) +    II->setChangedSinceDeserialization(); +} + +void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II, +                                           MacroDirective *ED, +                                           MacroDirective *MD) { +  // Normally, when a macro is defined, it goes through appendMacroDirective() +  // above, which chains a macro to previous defines, undefs, etc. +  // However, in a pch, the whole macro history up to the end of the pch is +  // stored, so ASTReader goes through this function instead. +  // However, built-in macros are already registered in the Preprocessor +  // ctor, and ASTWriter stops writing the macro chain at built-in macros, +  // so in that case the chain from the pch needs to be spliced to the existing +  // built-in. + +  assert(II && MD); +  MacroState &StoredMD = CurSubmoduleState->Macros[II]; + +  if (auto *OldMD = StoredMD.getLatest()) { +    // shouldIgnoreMacro() in ASTWriter also stops at macros from the +    // predefines buffer in module builds. However, in module builds, modules +    // are loaded completely before predefines are processed, so StoredMD +    // will be nullptr for them when they're loaded. StoredMD should only be +    // non-nullptr for builtins read from a pch file. +    assert(OldMD->getMacroInfo()->isBuiltinMacro() && +           "only built-ins should have an entry here"); +    assert(!OldMD->getPrevious() && "builtin should only have a single entry"); +    ED->setPrevious(OldMD); +    StoredMD.setLatest(MD); +  } else { +    StoredMD = MD; +  } + +  // Setup the identifier as having associated macro history. +  II->setHasMacroDefinition(true); +  if (!MD->isDefined() && LeafModuleMacros.find(II) == LeafModuleMacros.end()) +    II->setHasMacroDefinition(false); +} + +ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, IdentifierInfo *II, +                                          MacroInfo *Macro, +                                          ArrayRef<ModuleMacro *> Overrides, +                                          bool &New) { +  llvm::FoldingSetNodeID ID; +  ModuleMacro::Profile(ID, Mod, II); + +  void *InsertPos; +  if (auto *MM = ModuleMacros.FindNodeOrInsertPos(ID, InsertPos)) { +    New = false; +    return MM; +  } + +  auto *MM = ModuleMacro::create(*this, Mod, II, Macro, Overrides); +  ModuleMacros.InsertNode(MM, InsertPos); + +  // Each overridden macro is now overridden by one more macro. +  bool HidAny = false; +  for (auto *O : Overrides) { +    HidAny |= (O->NumOverriddenBy == 0); +    ++O->NumOverriddenBy; +  } + +  // If we were the first overrider for any macro, it's no longer a leaf. +  auto &LeafMacros = LeafModuleMacros[II]; +  if (HidAny) { +    LeafMacros.erase(std::remove_if(LeafMacros.begin(), LeafMacros.end(), +                                    [](ModuleMacro *MM) { +                                      return MM->NumOverriddenBy != 0; +                                    }), +                     LeafMacros.end()); +  } + +  // The new macro is always a leaf macro. +  LeafMacros.push_back(MM); +  // The identifier now has defined macros (that may or may not be visible). +  II->setHasMacroDefinition(true); + +  New = true; +  return MM; +} + +ModuleMacro *Preprocessor::getModuleMacro(Module *Mod, IdentifierInfo *II) { +  llvm::FoldingSetNodeID ID; +  ModuleMacro::Profile(ID, Mod, II); + +  void *InsertPos; +  return ModuleMacros.FindNodeOrInsertPos(ID, InsertPos); +} + +void Preprocessor::updateModuleMacroInfo(const IdentifierInfo *II, +                                         ModuleMacroInfo &Info) { +  assert(Info.ActiveModuleMacrosGeneration != +             CurSubmoduleState->VisibleModules.getGeneration() && +         "don't need to update this macro name info"); +  Info.ActiveModuleMacrosGeneration = +      CurSubmoduleState->VisibleModules.getGeneration(); + +  auto Leaf = LeafModuleMacros.find(II); +  if (Leaf == LeafModuleMacros.end()) { +    // No imported macros at all: nothing to do. +    return; +  } + +  Info.ActiveModuleMacros.clear(); + +  // Every macro that's locally overridden is overridden by a visible macro. +  llvm::DenseMap<ModuleMacro *, int> NumHiddenOverrides; +  for (auto *O : Info.OverriddenMacros) +    NumHiddenOverrides[O] = -1; + +  // Collect all macros that are not overridden by a visible macro. +  llvm::SmallVector<ModuleMacro *, 16> Worklist; +  for (auto *LeafMM : Leaf->second) { +    assert(LeafMM->getNumOverridingMacros() == 0 && "leaf macro overridden"); +    if (NumHiddenOverrides.lookup(LeafMM) == 0) +      Worklist.push_back(LeafMM); +  } +  while (!Worklist.empty()) { +    auto *MM = Worklist.pop_back_val(); +    if (CurSubmoduleState->VisibleModules.isVisible(MM->getOwningModule())) { +      // We only care about collecting definitions; undefinitions only act +      // to override other definitions. +      if (MM->getMacroInfo()) +        Info.ActiveModuleMacros.push_back(MM); +    } else { +      for (auto *O : MM->overrides()) +        if ((unsigned)++NumHiddenOverrides[O] == O->getNumOverridingMacros()) +          Worklist.push_back(O); +    } +  } +  // Our reverse postorder walk found the macros in reverse order. +  std::reverse(Info.ActiveModuleMacros.begin(), Info.ActiveModuleMacros.end()); + +  // Determine whether the macro name is ambiguous. +  MacroInfo *MI = nullptr; +  bool IsSystemMacro = true; +  bool IsAmbiguous = false; +  if (auto *MD = Info.MD) { +    while (MD && isa<VisibilityMacroDirective>(MD)) +      MD = MD->getPrevious(); +    if (auto *DMD = dyn_cast_or_null<DefMacroDirective>(MD)) { +      MI = DMD->getInfo(); +      IsSystemMacro &= SourceMgr.isInSystemHeader(DMD->getLocation()); +    } +  } +  for (auto *Active : Info.ActiveModuleMacros) { +    auto *NewMI = Active->getMacroInfo(); + +    // Before marking the macro as ambiguous, check if this is a case where +    // both macros are in system headers. If so, we trust that the system +    // did not get it wrong. This also handles cases where Clang's own +    // headers have a different spelling of certain system macros: +    //   #define LONG_MAX __LONG_MAX__ (clang's limits.h) +    //   #define LONG_MAX 0x7fffffffffffffffL (system's limits.h) +    // +    // FIXME: Remove the defined-in-system-headers check. clang's limits.h +    // overrides the system limits.h's macros, so there's no conflict here. +    if (MI && NewMI != MI && +        !MI->isIdenticalTo(*NewMI, *this, /*Syntactically=*/true)) +      IsAmbiguous = true; +    IsSystemMacro &= Active->getOwningModule()->IsSystem || +                     SourceMgr.isInSystemHeader(NewMI->getDefinitionLoc()); +    MI = NewMI; +  } +  Info.IsAmbiguous = IsAmbiguous && !IsSystemMacro; +} + +void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) { +  ArrayRef<ModuleMacro*> Leaf; +  auto LeafIt = LeafModuleMacros.find(II); +  if (LeafIt != LeafModuleMacros.end()) +    Leaf = LeafIt->second; +  const MacroState *State = nullptr; +  auto Pos = CurSubmoduleState->Macros.find(II); +  if (Pos != CurSubmoduleState->Macros.end()) +    State = &Pos->second; + +  llvm::errs() << "MacroState " << State << " " << II->getNameStart(); +  if (State && State->isAmbiguous(*this, II)) +    llvm::errs() << " ambiguous"; +  if (State && !State->getOverriddenMacros().empty()) { +    llvm::errs() << " overrides"; +    for (auto *O : State->getOverriddenMacros()) +      llvm::errs() << " " << O->getOwningModule()->getFullModuleName(); +  } +  llvm::errs() << "\n"; + +  // Dump local macro directives. +  for (auto *MD = State ? State->getLatest() : nullptr; MD; +       MD = MD->getPrevious()) { +    llvm::errs() << " "; +    MD->dump(); +  } + +  // Dump module macros. +  llvm::DenseSet<ModuleMacro*> Active; +  for (auto *MM : State ? State->getActiveModuleMacros(*this, II) : None) +    Active.insert(MM); +  llvm::DenseSet<ModuleMacro*> Visited; +  llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf.begin(), Leaf.end()); +  while (!Worklist.empty()) { +    auto *MM = Worklist.pop_back_val(); +    llvm::errs() << " ModuleMacro " << MM << " " +                 << MM->getOwningModule()->getFullModuleName(); +    if (!MM->getMacroInfo()) +      llvm::errs() << " undef"; + +    if (Active.count(MM)) +      llvm::errs() << " active"; +    else if (!CurSubmoduleState->VisibleModules.isVisible( +                 MM->getOwningModule())) +      llvm::errs() << " hidden"; +    else if (MM->getMacroInfo()) +      llvm::errs() << " overridden"; + +    if (!MM->overrides().empty()) { +      llvm::errs() << " overrides"; +      for (auto *O : MM->overrides()) { +        llvm::errs() << " " << O->getOwningModule()->getFullModuleName(); +        if (Visited.insert(O).second) +          Worklist.push_back(O); +      } +    } +    llvm::errs() << "\n"; +    if (auto *MI = MM->getMacroInfo()) { +      llvm::errs() << "  "; +      MI->dump(); +      llvm::errs() << "\n"; +    } +  } +} + +/// RegisterBuiltinMacro - Register the specified identifier in the identifier +/// table and mark it as a builtin macro to be expanded. +static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){ +  // Get the identifier. +  IdentifierInfo *Id = PP.getIdentifierInfo(Name); + +  // Mark it as being a macro that is builtin. +  MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation()); +  MI->setIsBuiltinMacro(); +  PP.appendDefMacroDirective(Id, MI); +  return Id; +} + +/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the +/// identifier table. +void Preprocessor::RegisterBuiltinMacros() { +  Ident__LINE__ = RegisterBuiltinMacro(*this, "__LINE__"); +  Ident__FILE__ = RegisterBuiltinMacro(*this, "__FILE__"); +  Ident__DATE__ = RegisterBuiltinMacro(*this, "__DATE__"); +  Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__"); +  Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__"); +  Ident_Pragma  = RegisterBuiltinMacro(*this, "_Pragma"); + +  // C++ Standing Document Extensions. +  if (LangOpts.CPlusPlus) +    Ident__has_cpp_attribute = +        RegisterBuiltinMacro(*this, "__has_cpp_attribute"); +  else +    Ident__has_cpp_attribute = nullptr; + +  // GCC Extensions. +  Ident__BASE_FILE__     = RegisterBuiltinMacro(*this, "__BASE_FILE__"); +  Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro(*this, "__INCLUDE_LEVEL__"); +  Ident__TIMESTAMP__     = RegisterBuiltinMacro(*this, "__TIMESTAMP__"); + +  // Microsoft Extensions. +  if (LangOpts.MicrosoftExt) { +    Ident__identifier = RegisterBuiltinMacro(*this, "__identifier"); +    Ident__pragma = RegisterBuiltinMacro(*this, "__pragma"); +  } else { +    Ident__identifier = nullptr; +    Ident__pragma = nullptr; +  } + +  // Clang Extensions. +  Ident__FILE_NAME__      = RegisterBuiltinMacro(*this, "__FILE_NAME__"); +  Ident__has_feature      = RegisterBuiltinMacro(*this, "__has_feature"); +  Ident__has_extension    = RegisterBuiltinMacro(*this, "__has_extension"); +  Ident__has_builtin      = RegisterBuiltinMacro(*this, "__has_builtin"); +  Ident__has_attribute    = RegisterBuiltinMacro(*this, "__has_attribute"); +  Ident__has_c_attribute  = RegisterBuiltinMacro(*this, "__has_c_attribute"); +  Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute"); +  Ident__has_include      = RegisterBuiltinMacro(*this, "__has_include"); +  Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next"); +  Ident__has_warning      = RegisterBuiltinMacro(*this, "__has_warning"); +  Ident__is_identifier    = RegisterBuiltinMacro(*this, "__is_identifier"); +  Ident__is_target_arch   = RegisterBuiltinMacro(*this, "__is_target_arch"); +  Ident__is_target_vendor = RegisterBuiltinMacro(*this, "__is_target_vendor"); +  Ident__is_target_os     = RegisterBuiltinMacro(*this, "__is_target_os"); +  Ident__is_target_environment = +      RegisterBuiltinMacro(*this, "__is_target_environment"); + +  // Modules. +  Ident__building_module  = RegisterBuiltinMacro(*this, "__building_module"); +  if (!LangOpts.CurrentModule.empty()) +    Ident__MODULE__ = RegisterBuiltinMacro(*this, "__MODULE__"); +  else +    Ident__MODULE__ = nullptr; +} + +/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token +/// in its expansion, currently expands to that token literally. +static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, +                                          const IdentifierInfo *MacroIdent, +                                          Preprocessor &PP) { +  IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo(); + +  // If the token isn't an identifier, it's always literally expanded. +  if (!II) return true; + +  // If the information about this identifier is out of date, update it from +  // the external source. +  if (II->isOutOfDate()) +    PP.getExternalSource()->updateOutOfDateIdentifier(*II); + +  // If the identifier is a macro, and if that macro is enabled, it may be +  // expanded so it's not a trivial expansion. +  if (auto *ExpansionMI = PP.getMacroInfo(II)) +    if (ExpansionMI->isEnabled() && +        // Fast expanding "#define X X" is ok, because X would be disabled. +        II != MacroIdent) +      return false; + +  // If this is an object-like macro invocation, it is safe to trivially expand +  // it. +  if (MI->isObjectLike()) return true; + +  // If this is a function-like macro invocation, it's safe to trivially expand +  // as long as the identifier is not a macro argument. +  return std::find(MI->param_begin(), MI->param_end(), II) == MI->param_end(); +} + +/// isNextPPTokenLParen - Determine whether the next preprocessor token to be +/// lexed is a '('.  If so, consume the token and return true, if not, this +/// method should have no observable side-effect on the lexed tokens. +bool Preprocessor::isNextPPTokenLParen() { +  // Do some quick tests for rejection cases. +  unsigned Val; +  if (CurLexer) +    Val = CurLexer->isNextPPTokenLParen(); +  else +    Val = CurTokenLexer->isNextTokenLParen(); + +  if (Val == 2) { +    // We have run off the end.  If it's a source file we don't +    // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the +    // macro stack. +    if (CurPPLexer) +      return false; +    for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { +      if (Entry.TheLexer) +        Val = Entry.TheLexer->isNextPPTokenLParen(); +      else +        Val = Entry.TheTokenLexer->isNextTokenLParen(); + +      if (Val != 2) +        break; + +      // Ran off the end of a source file? +      if (Entry.ThePPLexer) +        return false; +    } +  } + +  // Okay, if we know that the token is a '(', lex it and return.  Otherwise we +  // have found something that isn't a '(' or we found the end of the +  // translation unit.  In either case, return false. +  return Val == 1; +} + +/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be +/// expanded as a macro, handle it and return the next token as 'Identifier'. +bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, +                                                 const MacroDefinition &M) { +  MacroInfo *MI = M.getMacroInfo(); + +  // If this is a macro expansion in the "#if !defined(x)" line for the file, +  // then the macro could expand to different things in other contexts, we need +  // to disable the optimization in this case. +  if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro(); + +  // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. +  if (MI->isBuiltinMacro()) { +    if (Callbacks) +      Callbacks->MacroExpands(Identifier, M, Identifier.getLocation(), +                              /*Args=*/nullptr); +    ExpandBuiltinMacro(Identifier); +    return true; +  } + +  /// Args - If this is a function-like macro expansion, this contains, +  /// for each macro argument, the list of tokens that were provided to the +  /// invocation. +  MacroArgs *Args = nullptr; + +  // Remember where the end of the expansion occurred.  For an object-like +  // macro, this is the identifier.  For a function-like macro, this is the ')'. +  SourceLocation ExpansionEnd = Identifier.getLocation(); + +  // If this is a function-like macro, read the arguments. +  if (MI->isFunctionLike()) { +    // Remember that we are now parsing the arguments to a macro invocation. +    // Preprocessor directives used inside macro arguments are not portable, and +    // this enables the warning. +    InMacroArgs = true; +    ArgMacro = &Identifier; + +    Args = ReadMacroCallArgumentList(Identifier, MI, ExpansionEnd); + +    // Finished parsing args. +    InMacroArgs = false; +    ArgMacro = nullptr; + +    // If there was an error parsing the arguments, bail out. +    if (!Args) return true; + +    ++NumFnMacroExpanded; +  } else { +    ++NumMacroExpanded; +  } + +  // Notice that this macro has been used. +  markMacroAsUsed(MI); + +  // Remember where the token is expanded. +  SourceLocation ExpandLoc = Identifier.getLocation(); +  SourceRange ExpansionRange(ExpandLoc, ExpansionEnd); + +  if (Callbacks) { +    if (InMacroArgs) { +      // We can have macro expansion inside a conditional directive while +      // reading the function macro arguments. To ensure, in that case, that +      // MacroExpands callbacks still happen in source order, queue this +      // callback to have it happen after the function macro callback. +      DelayedMacroExpandsCallbacks.push_back( +          MacroExpandsInfo(Identifier, M, ExpansionRange)); +    } else { +      Callbacks->MacroExpands(Identifier, M, ExpansionRange, Args); +      if (!DelayedMacroExpandsCallbacks.empty()) { +        for (const MacroExpandsInfo &Info : DelayedMacroExpandsCallbacks) { +          // FIXME: We lose macro args info with delayed callback. +          Callbacks->MacroExpands(Info.Tok, Info.MD, Info.Range, +                                  /*Args=*/nullptr); +        } +        DelayedMacroExpandsCallbacks.clear(); +      } +    } +  } + +  // If the macro definition is ambiguous, complain. +  if (M.isAmbiguous()) { +    Diag(Identifier, diag::warn_pp_ambiguous_macro) +      << Identifier.getIdentifierInfo(); +    Diag(MI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_chosen) +      << Identifier.getIdentifierInfo(); +    M.forAllDefinitions([&](const MacroInfo *OtherMI) { +      if (OtherMI != MI) +        Diag(OtherMI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_other) +          << Identifier.getIdentifierInfo(); +    }); +  } + +  // If we started lexing a macro, enter the macro expansion body. + +  // If this macro expands to no tokens, don't bother to push it onto the +  // expansion stack, only to take it right back off. +  if (MI->getNumTokens() == 0) { +    // No need for arg info. +    if (Args) Args->destroy(*this); + +    // Propagate whitespace info as if we had pushed, then popped, +    // a macro context. +    Identifier.setFlag(Token::LeadingEmptyMacro); +    PropagateLineStartLeadingSpaceInfo(Identifier); +    ++NumFastMacroExpanded; +    return false; +  } else if (MI->getNumTokens() == 1 && +             isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(), +                                           *this)) { +    // Otherwise, if this macro expands into a single trivially-expanded +    // token: expand it now.  This handles common cases like +    // "#define VAL 42". + +    // No need for arg info. +    if (Args) Args->destroy(*this); + +    // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro +    // identifier to the expanded token. +    bool isAtStartOfLine = Identifier.isAtStartOfLine(); +    bool hasLeadingSpace = Identifier.hasLeadingSpace(); + +    // Replace the result token. +    Identifier = MI->getReplacementToken(0); + +    // Restore the StartOfLine/LeadingSpace markers. +    Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine); +    Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace); + +    // Update the tokens location to include both its expansion and physical +    // locations. +    SourceLocation Loc = +      SourceMgr.createExpansionLoc(Identifier.getLocation(), ExpandLoc, +                                   ExpansionEnd,Identifier.getLength()); +    Identifier.setLocation(Loc); + +    // If this is a disabled macro or #define X X, we must mark the result as +    // unexpandable. +    if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) { +      if (MacroInfo *NewMI = getMacroInfo(NewII)) +        if (!NewMI->isEnabled() || NewMI == MI) { +          Identifier.setFlag(Token::DisableExpand); +          // Don't warn for "#define X X" like "#define bool bool" from +          // stdbool.h. +          if (NewMI != MI || MI->isFunctionLike()) +            Diag(Identifier, diag::pp_disabled_macro_expansion); +        } +    } + +    // Since this is not an identifier token, it can't be macro expanded, so +    // we're done. +    ++NumFastMacroExpanded; +    return true; +  } + +  // Start expanding the macro. +  EnterMacro(Identifier, ExpansionEnd, MI, Args); +  return false; +} + +enum Bracket { +  Brace, +  Paren +}; + +/// CheckMatchedBrackets - Returns true if the braces and parentheses in the +/// token vector are properly nested. +static bool CheckMatchedBrackets(const SmallVectorImpl<Token> &Tokens) { +  SmallVector<Bracket, 8> Brackets; +  for (SmallVectorImpl<Token>::const_iterator I = Tokens.begin(), +                                              E = Tokens.end(); +       I != E; ++I) { +    if (I->is(tok::l_paren)) { +      Brackets.push_back(Paren); +    } else if (I->is(tok::r_paren)) { +      if (Brackets.empty() || Brackets.back() == Brace) +        return false; +      Brackets.pop_back(); +    } else if (I->is(tok::l_brace)) { +      Brackets.push_back(Brace); +    } else if (I->is(tok::r_brace)) { +      if (Brackets.empty() || Brackets.back() == Paren) +        return false; +      Brackets.pop_back(); +    } +  } +  return Brackets.empty(); +} + +/// GenerateNewArgTokens - Returns true if OldTokens can be converted to a new +/// vector of tokens in NewTokens.  The new number of arguments will be placed +/// in NumArgs and the ranges which need to surrounded in parentheses will be +/// in ParenHints. +/// Returns false if the token stream cannot be changed.  If this is because +/// of an initializer list starting a macro argument, the range of those +/// initializer lists will be place in InitLists. +static bool GenerateNewArgTokens(Preprocessor &PP, +                                 SmallVectorImpl<Token> &OldTokens, +                                 SmallVectorImpl<Token> &NewTokens, +                                 unsigned &NumArgs, +                                 SmallVectorImpl<SourceRange> &ParenHints, +                                 SmallVectorImpl<SourceRange> &InitLists) { +  if (!CheckMatchedBrackets(OldTokens)) +    return false; + +  // Once it is known that the brackets are matched, only a simple count of the +  // braces is needed. +  unsigned Braces = 0; + +  // First token of a new macro argument. +  SmallVectorImpl<Token>::iterator ArgStartIterator = OldTokens.begin(); + +  // First closing brace in a new macro argument.  Used to generate +  // SourceRanges for InitLists. +  SmallVectorImpl<Token>::iterator ClosingBrace = OldTokens.end(); +  NumArgs = 0; +  Token TempToken; +  // Set to true when a macro separator token is found inside a braced list. +  // If true, the fixed argument spans multiple old arguments and ParenHints +  // will be updated. +  bool FoundSeparatorToken = false; +  for (SmallVectorImpl<Token>::iterator I = OldTokens.begin(), +                                        E = OldTokens.end(); +       I != E; ++I) { +    if (I->is(tok::l_brace)) { +      ++Braces; +    } else if (I->is(tok::r_brace)) { +      --Braces; +      if (Braces == 0 && ClosingBrace == E && FoundSeparatorToken) +        ClosingBrace = I; +    } else if (I->is(tok::eof)) { +      // EOF token is used to separate macro arguments +      if (Braces != 0) { +        // Assume comma separator is actually braced list separator and change +        // it back to a comma. +        FoundSeparatorToken = true; +        I->setKind(tok::comma); +        I->setLength(1); +      } else { // Braces == 0 +        // Separator token still separates arguments. +        ++NumArgs; + +        // If the argument starts with a brace, it can't be fixed with +        // parentheses.  A different diagnostic will be given. +        if (FoundSeparatorToken && ArgStartIterator->is(tok::l_brace)) { +          InitLists.push_back( +              SourceRange(ArgStartIterator->getLocation(), +                          PP.getLocForEndOfToken(ClosingBrace->getLocation()))); +          ClosingBrace = E; +        } + +        // Add left paren +        if (FoundSeparatorToken) { +          TempToken.startToken(); +          TempToken.setKind(tok::l_paren); +          TempToken.setLocation(ArgStartIterator->getLocation()); +          TempToken.setLength(0); +          NewTokens.push_back(TempToken); +        } + +        // Copy over argument tokens +        NewTokens.insert(NewTokens.end(), ArgStartIterator, I); + +        // Add right paren and store the paren locations in ParenHints +        if (FoundSeparatorToken) { +          SourceLocation Loc = PP.getLocForEndOfToken((I - 1)->getLocation()); +          TempToken.startToken(); +          TempToken.setKind(tok::r_paren); +          TempToken.setLocation(Loc); +          TempToken.setLength(0); +          NewTokens.push_back(TempToken); +          ParenHints.push_back(SourceRange(ArgStartIterator->getLocation(), +                                           Loc)); +        } + +        // Copy separator token +        NewTokens.push_back(*I); + +        // Reset values +        ArgStartIterator = I + 1; +        FoundSeparatorToken = false; +      } +    } +  } + +  return !ParenHints.empty() && InitLists.empty(); +} + +/// ReadFunctionLikeMacroArgs - After reading "MACRO" and knowing that the next +/// token is the '(' of the macro, this method is invoked to read all of the +/// actual arguments specified for the macro invocation.  This returns null on +/// error. +MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName, +                                                   MacroInfo *MI, +                                                   SourceLocation &MacroEnd) { +  // The number of fixed arguments to parse. +  unsigned NumFixedArgsLeft = MI->getNumParams(); +  bool isVariadic = MI->isVariadic(); + +  // Outer loop, while there are more arguments, keep reading them. +  Token Tok; + +  // Read arguments as unexpanded tokens.  This avoids issues, e.g., where +  // an argument value in a macro could expand to ',' or '(' or ')'. +  LexUnexpandedToken(Tok); +  assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?"); + +  // ArgTokens - Build up a list of tokens that make up each argument.  Each +  // argument is separated by an EOF token.  Use a SmallVector so we can avoid +  // heap allocations in the common case. +  SmallVector<Token, 64> ArgTokens; +  bool ContainsCodeCompletionTok = false; +  bool FoundElidedComma = false; + +  SourceLocation TooManyArgsLoc; + +  unsigned NumActuals = 0; +  while (Tok.isNot(tok::r_paren)) { +    if (ContainsCodeCompletionTok && Tok.isOneOf(tok::eof, tok::eod)) +      break; + +    assert(Tok.isOneOf(tok::l_paren, tok::comma) && +           "only expect argument separators here"); + +    size_t ArgTokenStart = ArgTokens.size(); +    SourceLocation ArgStartLoc = Tok.getLocation(); + +    // C99 6.10.3p11: Keep track of the number of l_parens we have seen.  Note +    // that we already consumed the first one. +    unsigned NumParens = 0; + +    while (true) { +      // Read arguments as unexpanded tokens.  This avoids issues, e.g., where +      // an argument value in a macro could expand to ',' or '(' or ')'. +      LexUnexpandedToken(Tok); + +      if (Tok.isOneOf(tok::eof, tok::eod)) { // "#if f(<eof>" & "#if f(\n" +        if (!ContainsCodeCompletionTok) { +          Diag(MacroName, diag::err_unterm_macro_invoc); +          Diag(MI->getDefinitionLoc(), diag::note_macro_here) +            << MacroName.getIdentifierInfo(); +          // Do not lose the EOF/EOD.  Return it to the client. +          MacroName = Tok; +          return nullptr; +        } +        // Do not lose the EOF/EOD. +        auto Toks = std::make_unique<Token[]>(1); +        Toks[0] = Tok; +        EnterTokenStream(std::move(Toks), 1, true, /*IsReinject*/ false); +        break; +      } else if (Tok.is(tok::r_paren)) { +        // If we found the ) token, the macro arg list is done. +        if (NumParens-- == 0) { +          MacroEnd = Tok.getLocation(); +          if (!ArgTokens.empty() && +              ArgTokens.back().commaAfterElided()) { +            FoundElidedComma = true; +          } +          break; +        } +      } else if (Tok.is(tok::l_paren)) { +        ++NumParens; +      } else if (Tok.is(tok::comma) && NumParens == 0 && +                 !(Tok.getFlags() & Token::IgnoredComma)) { +        // In Microsoft-compatibility mode, single commas from nested macro +        // expansions should not be considered as argument separators. We test +        // for this with the IgnoredComma token flag above. + +        // Comma ends this argument if there are more fixed arguments expected. +        // However, if this is a variadic macro, and this is part of the +        // variadic part, then the comma is just an argument token. +        if (!isVariadic) break; +        if (NumFixedArgsLeft > 1) +          break; +      } else if (Tok.is(tok::comment) && !KeepMacroComments) { +        // If this is a comment token in the argument list and we're just in +        // -C mode (not -CC mode), discard the comment. +        continue; +      } else if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { +        // Reading macro arguments can cause macros that we are currently +        // expanding from to be popped off the expansion stack.  Doing so causes +        // them to be reenabled for expansion.  Here we record whether any +        // identifiers we lex as macro arguments correspond to disabled macros. +        // If so, we mark the token as noexpand.  This is a subtle aspect of +        // C99 6.10.3.4p2. +        if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo())) +          if (!MI->isEnabled()) +            Tok.setFlag(Token::DisableExpand); +      } else if (Tok.is(tok::code_completion)) { +        ContainsCodeCompletionTok = true; +        if (CodeComplete) +          CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(), +                                                  MI, NumActuals); +        // Don't mark that we reached the code-completion point because the +        // parser is going to handle the token and there will be another +        // code-completion callback. +      } + +      ArgTokens.push_back(Tok); +    } + +    // If this was an empty argument list foo(), don't add this as an empty +    // argument. +    if (ArgTokens.empty() && Tok.getKind() == tok::r_paren) +      break; + +    // If this is not a variadic macro, and too many args were specified, emit +    // an error. +    if (!isVariadic && NumFixedArgsLeft == 0 && TooManyArgsLoc.isInvalid()) { +      if (ArgTokens.size() != ArgTokenStart) +        TooManyArgsLoc = ArgTokens[ArgTokenStart].getLocation(); +      else +        TooManyArgsLoc = ArgStartLoc; +    } + +    // Empty arguments are standard in C99 and C++0x, and are supported as an +    // extension in other modes. +    if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99) +      Diag(Tok, LangOpts.CPlusPlus11 ? +           diag::warn_cxx98_compat_empty_fnmacro_arg : +           diag::ext_empty_fnmacro_arg); + +    // Add a marker EOF token to the end of the token list for this argument. +    Token EOFTok; +    EOFTok.startToken(); +    EOFTok.setKind(tok::eof); +    EOFTok.setLocation(Tok.getLocation()); +    EOFTok.setLength(0); +    ArgTokens.push_back(EOFTok); +    ++NumActuals; +    if (!ContainsCodeCompletionTok && NumFixedArgsLeft != 0) +      --NumFixedArgsLeft; +  } + +  // Okay, we either found the r_paren.  Check to see if we parsed too few +  // arguments. +  unsigned MinArgsExpected = MI->getNumParams(); + +  // If this is not a variadic macro, and too many args were specified, emit +  // an error. +  if (!isVariadic && NumActuals > MinArgsExpected && +      !ContainsCodeCompletionTok) { +    // Emit the diagnostic at the macro name in case there is a missing ). +    // Emitting it at the , could be far away from the macro name. +    Diag(TooManyArgsLoc, diag::err_too_many_args_in_macro_invoc); +    Diag(MI->getDefinitionLoc(), diag::note_macro_here) +      << MacroName.getIdentifierInfo(); + +    // Commas from braced initializer lists will be treated as argument +    // separators inside macros.  Attempt to correct for this with parentheses. +    // TODO: See if this can be generalized to angle brackets for templates +    // inside macro arguments. + +    SmallVector<Token, 4> FixedArgTokens; +    unsigned FixedNumArgs = 0; +    SmallVector<SourceRange, 4> ParenHints, InitLists; +    if (!GenerateNewArgTokens(*this, ArgTokens, FixedArgTokens, FixedNumArgs, +                              ParenHints, InitLists)) { +      if (!InitLists.empty()) { +        DiagnosticBuilder DB = +            Diag(MacroName, +                 diag::note_init_list_at_beginning_of_macro_argument); +        for (SourceRange Range : InitLists) +          DB << Range; +      } +      return nullptr; +    } +    if (FixedNumArgs != MinArgsExpected) +      return nullptr; + +    DiagnosticBuilder DB = Diag(MacroName, diag::note_suggest_parens_for_macro); +    for (SourceRange ParenLocation : ParenHints) { +      DB << FixItHint::CreateInsertion(ParenLocation.getBegin(), "("); +      DB << FixItHint::CreateInsertion(ParenLocation.getEnd(), ")"); +    } +    ArgTokens.swap(FixedArgTokens); +    NumActuals = FixedNumArgs; +  } + +  // See MacroArgs instance var for description of this. +  bool isVarargsElided = false; + +  if (ContainsCodeCompletionTok) { +    // Recover from not-fully-formed macro invocation during code-completion. +    Token EOFTok; +    EOFTok.startToken(); +    EOFTok.setKind(tok::eof); +    EOFTok.setLocation(Tok.getLocation()); +    EOFTok.setLength(0); +    for (; NumActuals < MinArgsExpected; ++NumActuals) +      ArgTokens.push_back(EOFTok); +  } + +  if (NumActuals < MinArgsExpected) { +    // There are several cases where too few arguments is ok, handle them now. +    if (NumActuals == 0 && MinArgsExpected == 1) { +      // #define A(X)  or  #define A(...)   ---> A() + +      // If there is exactly one argument, and that argument is missing, +      // then we have an empty "()" argument empty list.  This is fine, even if +      // the macro expects one argument (the argument is just empty). +      isVarargsElided = MI->isVariadic(); +    } else if ((FoundElidedComma || MI->isVariadic()) && +               (NumActuals+1 == MinArgsExpected ||  // A(x, ...) -> A(X) +                (NumActuals == 0 && MinArgsExpected == 2))) {// A(x,...) -> A() +      // Varargs where the named vararg parameter is missing: OK as extension. +      //   #define A(x, ...) +      //   A("blah") +      // +      // If the macro contains the comma pasting extension, the diagnostic +      // is suppressed; we know we'll get another diagnostic later. +      if (!MI->hasCommaPasting()) { +        Diag(Tok, diag::ext_missing_varargs_arg); +        Diag(MI->getDefinitionLoc(), diag::note_macro_here) +          << MacroName.getIdentifierInfo(); +      } + +      // Remember this occurred, allowing us to elide the comma when used for +      // cases like: +      //   #define A(x, foo...) blah(a, ## foo) +      //   #define B(x, ...) blah(a, ## __VA_ARGS__) +      //   #define C(...) blah(a, ## __VA_ARGS__) +      //  A(x) B(x) C() +      isVarargsElided = true; +    } else if (!ContainsCodeCompletionTok) { +      // Otherwise, emit the error. +      Diag(Tok, diag::err_too_few_args_in_macro_invoc); +      Diag(MI->getDefinitionLoc(), diag::note_macro_here) +        << MacroName.getIdentifierInfo(); +      return nullptr; +    } + +    // Add a marker EOF token to the end of the token list for this argument. +    SourceLocation EndLoc = Tok.getLocation(); +    Tok.startToken(); +    Tok.setKind(tok::eof); +    Tok.setLocation(EndLoc); +    Tok.setLength(0); +    ArgTokens.push_back(Tok); + +    // If we expect two arguments, add both as empty. +    if (NumActuals == 0 && MinArgsExpected == 2) +      ArgTokens.push_back(Tok); + +  } else if (NumActuals > MinArgsExpected && !MI->isVariadic() && +             !ContainsCodeCompletionTok) { +    // Emit the diagnostic at the macro name in case there is a missing ). +    // Emitting it at the , could be far away from the macro name. +    Diag(MacroName, diag::err_too_many_args_in_macro_invoc); +    Diag(MI->getDefinitionLoc(), diag::note_macro_here) +      << MacroName.getIdentifierInfo(); +    return nullptr; +  } + +  return MacroArgs::create(MI, ArgTokens, isVarargsElided, *this); +} + +/// Keeps macro expanded tokens for TokenLexers. +// +/// Works like a stack; a TokenLexer adds the macro expanded tokens that is +/// going to lex in the cache and when it finishes the tokens are removed +/// from the end of the cache. +Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer, +                                              ArrayRef<Token> tokens) { +  assert(tokLexer); +  if (tokens.empty()) +    return nullptr; + +  size_t newIndex = MacroExpandedTokens.size(); +  bool cacheNeedsToGrow = tokens.size() > +                      MacroExpandedTokens.capacity()-MacroExpandedTokens.size(); +  MacroExpandedTokens.append(tokens.begin(), tokens.end()); + +  if (cacheNeedsToGrow) { +    // Go through all the TokenLexers whose 'Tokens' pointer points in the +    // buffer and update the pointers to the (potential) new buffer array. +    for (const auto &Lexer : MacroExpandingLexersStack) { +      TokenLexer *prevLexer; +      size_t tokIndex; +      std::tie(prevLexer, tokIndex) = Lexer; +      prevLexer->Tokens = MacroExpandedTokens.data() + tokIndex; +    } +  } + +  MacroExpandingLexersStack.push_back(std::make_pair(tokLexer, newIndex)); +  return MacroExpandedTokens.data() + newIndex; +} + +void Preprocessor::removeCachedMacroExpandedTokensOfLastLexer() { +  assert(!MacroExpandingLexersStack.empty()); +  size_t tokIndex = MacroExpandingLexersStack.back().second; +  assert(tokIndex < MacroExpandedTokens.size()); +  // Pop the cached macro expanded tokens from the end. +  MacroExpandedTokens.resize(tokIndex); +  MacroExpandingLexersStack.pop_back(); +} + +/// ComputeDATE_TIME - Compute the current time, enter it into the specified +/// scratch buffer, then return DATELoc/TIMELoc locations with the position of +/// the identifier tokens inserted. +static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, +                             Preprocessor &PP) { +  time_t TT = time(nullptr); +  struct tm *TM = localtime(&TT); + +  static const char * const Months[] = { +    "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" +  }; + +  { +    SmallString<32> TmpBuffer; +    llvm::raw_svector_ostream TmpStream(TmpBuffer); +    TmpStream << llvm::format("\"%s %2d %4d\"", Months[TM->tm_mon], +                              TM->tm_mday, TM->tm_year + 1900); +    Token TmpTok; +    TmpTok.startToken(); +    PP.CreateString(TmpStream.str(), TmpTok); +    DATELoc = TmpTok.getLocation(); +  } + +  { +    SmallString<32> TmpBuffer; +    llvm::raw_svector_ostream TmpStream(TmpBuffer); +    TmpStream << llvm::format("\"%02d:%02d:%02d\"", +                              TM->tm_hour, TM->tm_min, TM->tm_sec); +    Token TmpTok; +    TmpTok.startToken(); +    PP.CreateString(TmpStream.str(), TmpTok); +    TIMELoc = TmpTok.getLocation(); +  } +} + +/// HasFeature - Return true if we recognize and implement the feature +/// specified by the identifier as a standard language feature. +static bool HasFeature(const Preprocessor &PP, StringRef Feature) { +  const LangOptions &LangOpts = PP.getLangOpts(); + +  // Normalize the feature name, __foo__ becomes foo. +  if (Feature.startswith("__") && Feature.endswith("__") && Feature.size() >= 4) +    Feature = Feature.substr(2, Feature.size() - 4); + +#define FEATURE(Name, Predicate) .Case(#Name, Predicate) +  return llvm::StringSwitch<bool>(Feature) +#include "clang/Basic/Features.def" +      .Default(false); +#undef FEATURE +} + +/// HasExtension - Return true if we recognize and implement the feature +/// specified by the identifier, either as an extension or a standard language +/// feature. +static bool HasExtension(const Preprocessor &PP, StringRef Extension) { +  if (HasFeature(PP, Extension)) +    return true; + +  // If the use of an extension results in an error diagnostic, extensions are +  // effectively unavailable, so just return false here. +  if (PP.getDiagnostics().getExtensionHandlingBehavior() >= +      diag::Severity::Error) +    return false; + +  const LangOptions &LangOpts = PP.getLangOpts(); + +  // Normalize the extension name, __foo__ becomes foo. +  if (Extension.startswith("__") && Extension.endswith("__") && +      Extension.size() >= 4) +    Extension = Extension.substr(2, Extension.size() - 4); + +    // Because we inherit the feature list from HasFeature, this string switch +    // must be less restrictive than HasFeature's. +#define EXTENSION(Name, Predicate) .Case(#Name, Predicate) +  return llvm::StringSwitch<bool>(Extension) +#include "clang/Basic/Features.def" +      .Default(false); +#undef EXTENSION +} + +/// EvaluateHasIncludeCommon - Process a '__has_include("path")' +/// or '__has_include_next("path")' expression. +/// Returns true if successful. +static bool EvaluateHasIncludeCommon(Token &Tok, +                                     IdentifierInfo *II, Preprocessor &PP, +                                     const DirectoryLookup *LookupFrom, +                                     const FileEntry *LookupFromFile) { +  // Save the location of the current token.  If a '(' is later found, use +  // that location.  If not, use the end of this location instead. +  SourceLocation LParenLoc = Tok.getLocation(); + +  // These expressions are only allowed within a preprocessor directive. +  if (!PP.isParsingIfOrElifDirective()) { +    PP.Diag(LParenLoc, diag::err_pp_directive_required) << II; +    // Return a valid identifier token. +    assert(Tok.is(tok::identifier)); +    Tok.setIdentifierInfo(II); +    return false; +  } + +  // Get '('. If we don't have a '(', try to form a header-name token. +  do { +    if (PP.LexHeaderName(Tok)) +      return false; +  } while (Tok.getKind() == tok::comment); + +  // Ensure we have a '('. +  if (Tok.isNot(tok::l_paren)) { +    // No '(', use end of last token. +    LParenLoc = PP.getLocForEndOfToken(LParenLoc); +    PP.Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren; +    // If the next token looks like a filename or the start of one, +    // assume it is and process it as such. +    if (Tok.isNot(tok::header_name)) +      return false; +  } else { +    // Save '(' location for possible missing ')' message. +    LParenLoc = Tok.getLocation(); +    if (PP.LexHeaderName(Tok)) +      return false; +  } + +  if (Tok.isNot(tok::header_name)) { +    PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename); +    return false; +  } + +  // Reserve a buffer to get the spelling. +  SmallString<128> FilenameBuffer; +  bool Invalid = false; +  StringRef Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid); +  if (Invalid) +    return false; + +  SourceLocation FilenameLoc = Tok.getLocation(); + +  // Get ')'. +  PP.LexNonComment(Tok); + +  // Ensure we have a trailing ). +  if (Tok.isNot(tok::r_paren)) { +    PP.Diag(PP.getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after) +        << II << tok::r_paren; +    PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren; +    return false; +  } + +  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); +  // If GetIncludeFilenameSpelling set the start ptr to null, there was an +  // error. +  if (Filename.empty()) +    return false; + +  // Search include directories. +  const DirectoryLookup *CurDir; +  Optional<FileEntryRef> File = +      PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, LookupFromFile, +                    CurDir, nullptr, nullptr, nullptr, nullptr, nullptr); + +  if (PPCallbacks *Callbacks = PP.getPPCallbacks()) { +    SrcMgr::CharacteristicKind FileType = SrcMgr::C_User; +    if (File) +      FileType = +          PP.getHeaderSearchInfo().getFileDirFlavor(&File->getFileEntry()); +    Callbacks->HasInclude(FilenameLoc, Filename, isAngled, File, FileType); +  } + +  // Get the result value.  A result of true means the file exists. +  return File.hasValue(); +} + +/// EvaluateHasInclude - Process a '__has_include("path")' expression. +/// Returns true if successful. +static bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II, +                               Preprocessor &PP) { +  return EvaluateHasIncludeCommon(Tok, II, PP, nullptr, nullptr); +} + +/// EvaluateHasIncludeNext - Process '__has_include_next("path")' expression. +/// Returns true if successful. +static bool EvaluateHasIncludeNext(Token &Tok, +                                   IdentifierInfo *II, Preprocessor &PP) { +  // __has_include_next is like __has_include, except that we start +  // searching after the current found directory.  If we can't do this, +  // issue a diagnostic. +  // FIXME: Factor out duplication with +  // Preprocessor::HandleIncludeNextDirective. +  const DirectoryLookup *Lookup = PP.GetCurDirLookup(); +  const FileEntry *LookupFromFile = nullptr; +  if (PP.isInPrimaryFile() && PP.getLangOpts().IsHeaderFile) { +    // If the main file is a header, then it's either for PCH/AST generation, +    // or libclang opened it. Either way, handle it as a normal include below +    // and do not complain about __has_include_next. +  } else if (PP.isInPrimaryFile()) { +    Lookup = nullptr; +    PP.Diag(Tok, diag::pp_include_next_in_primary); +  } else if (PP.getCurrentLexerSubmodule()) { +    // Start looking up in the directory *after* the one in which the current +    // file would be found, if any. +    assert(PP.getCurrentLexer() && "#include_next directive in macro?"); +    LookupFromFile = PP.getCurrentLexer()->getFileEntry(); +    Lookup = nullptr; +  } else if (!Lookup) { +    PP.Diag(Tok, diag::pp_include_next_absolute_path); +  } else { +    // Start looking up in the next directory. +    ++Lookup; +  } + +  return EvaluateHasIncludeCommon(Tok, II, PP, Lookup, LookupFromFile); +} + +/// Process single-argument builtin feature-like macros that return +/// integer values. +static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS, +                                            Token &Tok, IdentifierInfo *II, +                                            Preprocessor &PP, +                                            llvm::function_ref< +                                              int(Token &Tok, +                                                  bool &HasLexedNextTok)> Op) { +  // Parse the initial '('. +  PP.LexUnexpandedToken(Tok); +  if (Tok.isNot(tok::l_paren)) { +    PP.Diag(Tok.getLocation(), diag::err_pp_expected_after) << II +                                                            << tok::l_paren; + +    // Provide a dummy '0' value on output stream to elide further errors. +    if (!Tok.isOneOf(tok::eof, tok::eod)) { +      OS << 0; +      Tok.setKind(tok::numeric_constant); +    } +    return; +  } + +  unsigned ParenDepth = 1; +  SourceLocation LParenLoc = Tok.getLocation(); +  llvm::Optional<int> Result; + +  Token ResultTok; +  bool SuppressDiagnostic = false; +  while (true) { +    // Parse next token. +    PP.LexUnexpandedToken(Tok); + +already_lexed: +    switch (Tok.getKind()) { +      case tok::eof: +      case tok::eod: +        // Don't provide even a dummy value if the eod or eof marker is +        // reached.  Simply provide a diagnostic. +        PP.Diag(Tok.getLocation(), diag::err_unterm_macro_invoc); +        return; + +      case tok::comma: +        if (!SuppressDiagnostic) { +          PP.Diag(Tok.getLocation(), diag::err_too_many_args_in_macro_invoc); +          SuppressDiagnostic = true; +        } +        continue; + +      case tok::l_paren: +        ++ParenDepth; +        if (Result.hasValue()) +          break; +        if (!SuppressDiagnostic) { +          PP.Diag(Tok.getLocation(), diag::err_pp_nested_paren) << II; +          SuppressDiagnostic = true; +        } +        continue; + +      case tok::r_paren: +        if (--ParenDepth > 0) +          continue; + +        // The last ')' has been reached; return the value if one found or +        // a diagnostic and a dummy value. +        if (Result.hasValue()) { +          OS << Result.getValue(); +          // For strict conformance to __has_cpp_attribute rules, use 'L' +          // suffix for dated literals. +          if (Result.getValue() > 1) +            OS << 'L'; +        } else { +          OS << 0; +          if (!SuppressDiagnostic) +            PP.Diag(Tok.getLocation(), diag::err_too_few_args_in_macro_invoc); +        } +        Tok.setKind(tok::numeric_constant); +        return; + +      default: { +        // Parse the macro argument, if one not found so far. +        if (Result.hasValue()) +          break; + +        bool HasLexedNextToken = false; +        Result = Op(Tok, HasLexedNextToken); +        ResultTok = Tok; +        if (HasLexedNextToken) +          goto already_lexed; +        continue; +      } +    } + +    // Diagnose missing ')'. +    if (!SuppressDiagnostic) { +      if (auto Diag = PP.Diag(Tok.getLocation(), diag::err_pp_expected_after)) { +        if (IdentifierInfo *LastII = ResultTok.getIdentifierInfo()) +          Diag << LastII; +        else +          Diag << ResultTok.getKind(); +        Diag << tok::r_paren << ResultTok.getLocation(); +      } +      PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren; +      SuppressDiagnostic = true; +    } +  } +} + +/// Helper function to return the IdentifierInfo structure of a Token +/// or generate a diagnostic if none available. +static IdentifierInfo *ExpectFeatureIdentifierInfo(Token &Tok, +                                                   Preprocessor &PP, +                                                   signed DiagID) { +  IdentifierInfo *II; +  if (!Tok.isAnnotation() && (II = Tok.getIdentifierInfo())) +    return II; + +  PP.Diag(Tok.getLocation(), DiagID); +  return nullptr; +} + +/// Implements the __is_target_arch builtin macro. +static bool isTargetArch(const TargetInfo &TI, const IdentifierInfo *II) { +  std::string ArchName = II->getName().lower() + "--"; +  llvm::Triple Arch(ArchName); +  const llvm::Triple &TT = TI.getTriple(); +  if (TT.isThumb()) { +    // arm matches thumb or thumbv7. armv7 matches thumbv7. +    if ((Arch.getSubArch() == llvm::Triple::NoSubArch || +         Arch.getSubArch() == TT.getSubArch()) && +        ((TT.getArch() == llvm::Triple::thumb && +          Arch.getArch() == llvm::Triple::arm) || +         (TT.getArch() == llvm::Triple::thumbeb && +          Arch.getArch() == llvm::Triple::armeb))) +      return true; +  } +  // Check the parsed arch when it has no sub arch to allow Clang to +  // match thumb to thumbv7 but to prohibit matching thumbv6 to thumbv7. +  return (Arch.getSubArch() == llvm::Triple::NoSubArch || +          Arch.getSubArch() == TT.getSubArch()) && +         Arch.getArch() == TT.getArch(); +} + +/// Implements the __is_target_vendor builtin macro. +static bool isTargetVendor(const TargetInfo &TI, const IdentifierInfo *II) { +  StringRef VendorName = TI.getTriple().getVendorName(); +  if (VendorName.empty()) +    VendorName = "unknown"; +  return VendorName.equals_lower(II->getName()); +} + +/// Implements the __is_target_os builtin macro. +static bool isTargetOS(const TargetInfo &TI, const IdentifierInfo *II) { +  std::string OSName = +      (llvm::Twine("unknown-unknown-") + II->getName().lower()).str(); +  llvm::Triple OS(OSName); +  if (OS.getOS() == llvm::Triple::Darwin) { +    // Darwin matches macos, ios, etc. +    return TI.getTriple().isOSDarwin(); +  } +  return TI.getTriple().getOS() == OS.getOS(); +} + +/// Implements the __is_target_environment builtin macro. +static bool isTargetEnvironment(const TargetInfo &TI, +                                const IdentifierInfo *II) { +  std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str(); +  llvm::Triple Env(EnvName); +  return TI.getTriple().getEnvironment() == Env.getEnvironment(); +} + +/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded +/// as a builtin macro, handle it and return the next token as 'Tok'. +void Preprocessor::ExpandBuiltinMacro(Token &Tok) { +  // Figure out which token this is. +  IdentifierInfo *II = Tok.getIdentifierInfo(); +  assert(II && "Can't be a macro without id info!"); + +  // If this is an _Pragma or Microsoft __pragma directive, expand it, +  // invoke the pragma handler, then lex the token after it. +  if (II == Ident_Pragma) +    return Handle_Pragma(Tok); +  else if (II == Ident__pragma) // in non-MS mode this is null +    return HandleMicrosoft__pragma(Tok); + +  ++NumBuiltinMacroExpanded; + +  SmallString<128> TmpBuffer; +  llvm::raw_svector_ostream OS(TmpBuffer); + +  // Set up the return result. +  Tok.setIdentifierInfo(nullptr); +  Tok.clearFlag(Token::NeedsCleaning); +  bool IsAtStartOfLine = Tok.isAtStartOfLine(); +  bool HasLeadingSpace = Tok.hasLeadingSpace(); + +  if (II == Ident__LINE__) { +    // C99 6.10.8: "__LINE__: The presumed line number (within the current +    // source file) of the current source line (an integer constant)".  This can +    // be affected by #line. +    SourceLocation Loc = Tok.getLocation(); + +    // Advance to the location of the first _, this might not be the first byte +    // of the token if it starts with an escaped newline. +    Loc = AdvanceToTokenCharacter(Loc, 0); + +    // One wrinkle here is that GCC expands __LINE__ to location of the *end* of +    // a macro expansion.  This doesn't matter for object-like macros, but +    // can matter for a function-like macro that expands to contain __LINE__. +    // Skip down through expansion points until we find a file loc for the +    // end of the expansion history. +    Loc = SourceMgr.getExpansionRange(Loc).getEnd(); +    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc); + +    // __LINE__ expands to a simple numeric value. +    OS << (PLoc.isValid()? PLoc.getLine() : 1); +    Tok.setKind(tok::numeric_constant); +  } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__ || +             II == Ident__FILE_NAME__) { +    // C99 6.10.8: "__FILE__: The presumed name of the current source file (a +    // character string literal)". This can be affected by #line. +    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation()); + +    // __BASE_FILE__ is a GNU extension that returns the top of the presumed +    // #include stack instead of the current file. +    if (II == Ident__BASE_FILE__ && PLoc.isValid()) { +      SourceLocation NextLoc = PLoc.getIncludeLoc(); +      while (NextLoc.isValid()) { +        PLoc = SourceMgr.getPresumedLoc(NextLoc); +        if (PLoc.isInvalid()) +          break; + +        NextLoc = PLoc.getIncludeLoc(); +      } +    } + +    // Escape this filename.  Turn '\' -> '\\' '"' -> '\"' +    SmallString<128> FN; +    if (PLoc.isValid()) { +      // __FILE_NAME__ is a Clang-specific extension that expands to the +      // the last part of __FILE__. +      if (II == Ident__FILE_NAME__) { +        // Try to get the last path component, failing that return the original +        // presumed location. +        StringRef PLFileName = llvm::sys::path::filename(PLoc.getFilename()); +        if (PLFileName != "") +          FN += PLFileName; +        else +          FN += PLoc.getFilename(); +      } else { +        FN += PLoc.getFilename(); +      } +      Lexer::Stringify(FN); +      OS << '"' << FN << '"'; +    } +    Tok.setKind(tok::string_literal); +  } else if (II == Ident__DATE__) { +    Diag(Tok.getLocation(), diag::warn_pp_date_time); +    if (!DATELoc.isValid()) +      ComputeDATE_TIME(DATELoc, TIMELoc, *this); +    Tok.setKind(tok::string_literal); +    Tok.setLength(strlen("\"Mmm dd yyyy\"")); +    Tok.setLocation(SourceMgr.createExpansionLoc(DATELoc, Tok.getLocation(), +                                                 Tok.getLocation(), +                                                 Tok.getLength())); +    return; +  } else if (II == Ident__TIME__) { +    Diag(Tok.getLocation(), diag::warn_pp_date_time); +    if (!TIMELoc.isValid()) +      ComputeDATE_TIME(DATELoc, TIMELoc, *this); +    Tok.setKind(tok::string_literal); +    Tok.setLength(strlen("\"hh:mm:ss\"")); +    Tok.setLocation(SourceMgr.createExpansionLoc(TIMELoc, Tok.getLocation(), +                                                 Tok.getLocation(), +                                                 Tok.getLength())); +    return; +  } else if (II == Ident__INCLUDE_LEVEL__) { +    // Compute the presumed include depth of this token.  This can be affected +    // by GNU line markers. +    unsigned Depth = 0; + +    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation()); +    if (PLoc.isValid()) { +      PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc()); +      for (; PLoc.isValid(); ++Depth) +        PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc()); +    } + +    // __INCLUDE_LEVEL__ expands to a simple numeric value. +    OS << Depth; +    Tok.setKind(tok::numeric_constant); +  } else if (II == Ident__TIMESTAMP__) { +    Diag(Tok.getLocation(), diag::warn_pp_date_time); +    // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be +    // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. + +    // Get the file that we are lexing out of.  If we're currently lexing from +    // a macro, dig into the include stack. +    const FileEntry *CurFile = nullptr; +    PreprocessorLexer *TheLexer = getCurrentFileLexer(); + +    if (TheLexer) +      CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID()); + +    const char *Result; +    if (CurFile) { +      time_t TT = CurFile->getModificationTime(); +      struct tm *TM = localtime(&TT); +      Result = asctime(TM); +    } else { +      Result = "??? ??? ?? ??:??:?? ????\n"; +    } +    // Surround the string with " and strip the trailing newline. +    OS << '"' << StringRef(Result).drop_back() << '"'; +    Tok.setKind(tok::string_literal); +  } else if (II == Ident__COUNTER__) { +    // __COUNTER__ expands to a simple numeric value. +    OS << CounterValue++; +    Tok.setKind(tok::numeric_constant); +  } else if (II == Ident__has_feature) { +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [this](Token &Tok, bool &HasLexedNextToken) -> int { +        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, +                                           diag::err_feature_check_malformed); +        return II && HasFeature(*this, II->getName()); +      }); +  } else if (II == Ident__has_extension) { +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [this](Token &Tok, bool &HasLexedNextToken) -> int { +        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, +                                           diag::err_feature_check_malformed); +        return II && HasExtension(*this, II->getName()); +      }); +  } else if (II == Ident__has_builtin) { +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [this](Token &Tok, bool &HasLexedNextToken) -> int { +        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, +                                           diag::err_feature_check_malformed); +        const LangOptions &LangOpts = getLangOpts(); +        if (!II) +          return false; +        else if (II->getBuiltinID() != 0) { +          switch (II->getBuiltinID()) { +          case Builtin::BI__builtin_operator_new: +          case Builtin::BI__builtin_operator_delete: +            // denotes date of behavior change to support calling arbitrary +            // usual allocation and deallocation functions. Required by libc++ +            return 201802; +          default: +            return true; +          } +          return true; +        } else if (II->getTokenID() != tok::identifier || +                   II->hasRevertedTokenIDToIdentifier()) { +          // Treat all keywords that introduce a custom syntax of the form +          // +          //   '__some_keyword' '(' [...] ')' +          // +          // as being "builtin functions", even if the syntax isn't a valid +          // function call (for example, because the builtin takes a type +          // argument). +          if (II->getName().startswith("__builtin_") || +              II->getName().startswith("__is_") || +              II->getName().startswith("__has_")) +            return true; +          return llvm::StringSwitch<bool>(II->getName()) +              .Case("__array_rank", true) +              .Case("__array_extent", true) +              .Case("__reference_binds_to_temporary", true) +              .Case("__underlying_type", true) +              .Default(false); +        } else { +          return llvm::StringSwitch<bool>(II->getName()) +              // Report builtin templates as being builtins. +              .Case("__make_integer_seq", LangOpts.CPlusPlus) +              .Case("__type_pack_element", LangOpts.CPlusPlus) +              // Likewise for some builtin preprocessor macros. +              // FIXME: This is inconsistent; we usually suggest detecting +              // builtin macros via #ifdef. Don't add more cases here. +              .Case("__is_target_arch", true) +              .Case("__is_target_vendor", true) +              .Case("__is_target_os", true) +              .Case("__is_target_environment", true) +              .Default(false); +        } +      }); +  } else if (II == Ident__is_identifier) { +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [](Token &Tok, bool &HasLexedNextToken) -> int { +        return Tok.is(tok::identifier); +      }); +  } else if (II == Ident__has_attribute) { +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [this](Token &Tok, bool &HasLexedNextToken) -> int { +        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, +                                           diag::err_feature_check_malformed); +        return II ? hasAttribute(AttrSyntax::GNU, nullptr, II, +                                 getTargetInfo(), getLangOpts()) : 0; +      }); +  } else if (II == Ident__has_declspec) { +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [this](Token &Tok, bool &HasLexedNextToken) -> int { +        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, +                                           diag::err_feature_check_malformed); +        return II ? hasAttribute(AttrSyntax::Declspec, nullptr, II, +                                 getTargetInfo(), getLangOpts()) : 0; +      }); +  } else if (II == Ident__has_cpp_attribute || +             II == Ident__has_c_attribute) { +    bool IsCXX = II == Ident__has_cpp_attribute; +    EvaluateFeatureLikeBuiltinMacro( +        OS, Tok, II, *this, [&](Token &Tok, bool &HasLexedNextToken) -> int { +          IdentifierInfo *ScopeII = nullptr; +          IdentifierInfo *II = ExpectFeatureIdentifierInfo( +              Tok, *this, diag::err_feature_check_malformed); +          if (!II) +            return false; + +          // It is possible to receive a scope token.  Read the "::", if it is +          // available, and the subsequent identifier. +          LexUnexpandedToken(Tok); +          if (Tok.isNot(tok::coloncolon)) +            HasLexedNextToken = true; +          else { +            ScopeII = II; +            LexUnexpandedToken(Tok); +            II = ExpectFeatureIdentifierInfo(Tok, *this, +                                             diag::err_feature_check_malformed); +          } + +          AttrSyntax Syntax = IsCXX ? AttrSyntax::CXX : AttrSyntax::C; +          return II ? hasAttribute(Syntax, ScopeII, II, getTargetInfo(), +                                   getLangOpts()) +                    : 0; +        }); +  } else if (II == Ident__has_include || +             II == Ident__has_include_next) { +    // The argument to these two builtins should be a parenthesized +    // file name string literal using angle brackets (<>) or +    // double-quotes (""). +    bool Value; +    if (II == Ident__has_include) +      Value = EvaluateHasInclude(Tok, II, *this); +    else +      Value = EvaluateHasIncludeNext(Tok, II, *this); + +    if (Tok.isNot(tok::r_paren)) +      return; +    OS << (int)Value; +    Tok.setKind(tok::numeric_constant); +  } else if (II == Ident__has_warning) { +    // The argument should be a parenthesized string literal. +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [this](Token &Tok, bool &HasLexedNextToken) -> int { +        std::string WarningName; +        SourceLocation StrStartLoc = Tok.getLocation(); + +        HasLexedNextToken = Tok.is(tok::string_literal); +        if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'", +                                    /*AllowMacroExpansion=*/false)) +          return false; + +        // FIXME: Should we accept "-R..." flags here, or should that be +        // handled by a separate __has_remark? +        if (WarningName.size() < 3 || WarningName[0] != '-' || +            WarningName[1] != 'W') { +          Diag(StrStartLoc, diag::warn_has_warning_invalid_option); +          return false; +        } + +        // Finally, check if the warning flags maps to a diagnostic group. +        // We construct a SmallVector here to talk to getDiagnosticIDs(). +        // Although we don't use the result, this isn't a hot path, and not +        // worth special casing. +        SmallVector<diag::kind, 10> Diags; +        return !getDiagnostics().getDiagnosticIDs()-> +                getDiagnosticsInGroup(diag::Flavor::WarningOrError, +                                      WarningName.substr(2), Diags); +      }); +  } else if (II == Ident__building_module) { +    // The argument to this builtin should be an identifier. The +    // builtin evaluates to 1 when that identifier names the module we are +    // currently building. +    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, +      [this](Token &Tok, bool &HasLexedNextToken) -> int { +        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, +                                       diag::err_expected_id_building_module); +        return getLangOpts().isCompilingModule() && II && +               (II->getName() == getLangOpts().CurrentModule); +      }); +  } else if (II == Ident__MODULE__) { +    // The current module as an identifier. +    OS << getLangOpts().CurrentModule; +    IdentifierInfo *ModuleII = getIdentifierInfo(getLangOpts().CurrentModule); +    Tok.setIdentifierInfo(ModuleII); +    Tok.setKind(ModuleII->getTokenID()); +  } else if (II == Ident__identifier) { +    SourceLocation Loc = Tok.getLocation(); + +    // We're expecting '__identifier' '(' identifier ')'. Try to recover +    // if the parens are missing. +    LexNonComment(Tok); +    if (Tok.isNot(tok::l_paren)) { +      // No '(', use end of last token. +      Diag(getLocForEndOfToken(Loc), diag::err_pp_expected_after) +        << II << tok::l_paren; +      // If the next token isn't valid as our argument, we can't recover. +      if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) +        Tok.setKind(tok::identifier); +      return; +    } + +    SourceLocation LParenLoc = Tok.getLocation(); +    LexNonComment(Tok); + +    if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) +      Tok.setKind(tok::identifier); +    else { +      Diag(Tok.getLocation(), diag::err_pp_identifier_arg_not_identifier) +        << Tok.getKind(); +      // Don't walk past anything that's not a real token. +      if (Tok.isOneOf(tok::eof, tok::eod) || Tok.isAnnotation()) +        return; +    } + +    // Discard the ')', preserving 'Tok' as our result. +    Token RParen; +    LexNonComment(RParen); +    if (RParen.isNot(tok::r_paren)) { +      Diag(getLocForEndOfToken(Tok.getLocation()), diag::err_pp_expected_after) +        << Tok.getKind() << tok::r_paren; +      Diag(LParenLoc, diag::note_matching) << tok::l_paren; +    } +    return; +  } else if (II == Ident__is_target_arch) { +    EvaluateFeatureLikeBuiltinMacro( +        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { +          IdentifierInfo *II = ExpectFeatureIdentifierInfo( +              Tok, *this, diag::err_feature_check_malformed); +          return II && isTargetArch(getTargetInfo(), II); +        }); +  } else if (II == Ident__is_target_vendor) { +    EvaluateFeatureLikeBuiltinMacro( +        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { +          IdentifierInfo *II = ExpectFeatureIdentifierInfo( +              Tok, *this, diag::err_feature_check_malformed); +          return II && isTargetVendor(getTargetInfo(), II); +        }); +  } else if (II == Ident__is_target_os) { +    EvaluateFeatureLikeBuiltinMacro( +        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { +          IdentifierInfo *II = ExpectFeatureIdentifierInfo( +              Tok, *this, diag::err_feature_check_malformed); +          return II && isTargetOS(getTargetInfo(), II); +        }); +  } else if (II == Ident__is_target_environment) { +    EvaluateFeatureLikeBuiltinMacro( +        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { +          IdentifierInfo *II = ExpectFeatureIdentifierInfo( +              Tok, *this, diag::err_feature_check_malformed); +          return II && isTargetEnvironment(getTargetInfo(), II); +        }); +  } else { +    llvm_unreachable("Unknown identifier!"); +  } +  CreateString(OS.str(), Tok, Tok.getLocation(), Tok.getLocation()); +  Tok.setFlagValue(Token::StartOfLine, IsAtStartOfLine); +  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); +} + +void Preprocessor::markMacroAsUsed(MacroInfo *MI) { +  // If the 'used' status changed, and the macro requires 'unused' warning, +  // remove its SourceLocation from the warn-for-unused-macro locations. +  if (MI->isWarnIfUnused() && !MI->isUsed()) +    WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); +  MI->setIsUsed(true); +} diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp new file mode 100644 index 000000000000..79953804b5d3 --- /dev/null +++ b/clang/lib/Lex/Pragma.cpp @@ -0,0 +1,1914 @@ +//===- Pragma.cpp - Pragma registration and handling ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the PragmaHandler/PragmaTable interfaces and implements +// pragma related methods of the Preprocessor class. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Pragma.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorLexer.h" +#include "clang/Lex/Token.h" +#include "clang/Lex/TokenLexer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <string> +#include <utility> +#include <vector> + +using namespace clang; + +// Out-of-line destructor to provide a home for the class. +PragmaHandler::~PragmaHandler() = default; + +//===----------------------------------------------------------------------===// +// EmptyPragmaHandler Implementation. +//===----------------------------------------------------------------------===// + +EmptyPragmaHandler::EmptyPragmaHandler(StringRef Name) : PragmaHandler(Name) {} + +void EmptyPragmaHandler::HandlePragma(Preprocessor &PP, +                                      PragmaIntroducer Introducer, +                                      Token &FirstToken) {} + +//===----------------------------------------------------------------------===// +// PragmaNamespace Implementation. +//===----------------------------------------------------------------------===// + +PragmaNamespace::~PragmaNamespace() { +  llvm::DeleteContainerSeconds(Handlers); +} + +/// FindHandler - Check to see if there is already a handler for the +/// specified name.  If not, return the handler for the null identifier if it +/// exists, otherwise return null.  If IgnoreNull is true (the default) then +/// the null handler isn't returned on failure to match. +PragmaHandler *PragmaNamespace::FindHandler(StringRef Name, +                                            bool IgnoreNull) const { +  if (PragmaHandler *Handler = Handlers.lookup(Name)) +    return Handler; +  return IgnoreNull ? nullptr : Handlers.lookup(StringRef()); +} + +void PragmaNamespace::AddPragma(PragmaHandler *Handler) { +  assert(!Handlers.lookup(Handler->getName()) && +         "A handler with this name is already registered in this namespace"); +  Handlers[Handler->getName()] = Handler; +} + +void PragmaNamespace::RemovePragmaHandler(PragmaHandler *Handler) { +  assert(Handlers.lookup(Handler->getName()) && +         "Handler not registered in this namespace"); +  Handlers.erase(Handler->getName()); +} + +void PragmaNamespace::HandlePragma(Preprocessor &PP, +                                   PragmaIntroducer Introducer, Token &Tok) { +  // Read the 'namespace' that the directive is in, e.g. STDC.  Do not macro +  // expand it, the user can have a STDC #define, that should not affect this. +  PP.LexUnexpandedToken(Tok); + +  // Get the handler for this token.  If there is no handler, ignore the pragma. +  PragmaHandler *Handler +    = FindHandler(Tok.getIdentifierInfo() ? Tok.getIdentifierInfo()->getName() +                                          : StringRef(), +                  /*IgnoreNull=*/false); +  if (!Handler) { +    PP.Diag(Tok, diag::warn_pragma_ignored); +    return; +  } + +  // Otherwise, pass it down. +  Handler->HandlePragma(PP, Introducer, Tok); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Pragma Directive Handling. +//===----------------------------------------------------------------------===// + +namespace { +// TokenCollector provides the option to collect tokens that were "read" +// and return them to the stream to be read later. +// Currently used when reading _Pragma/__pragma directives. +struct TokenCollector { +  Preprocessor &Self; +  bool Collect; +  SmallVector<Token, 3> Tokens; +  Token &Tok; + +  void lex() { +    if (Collect) +      Tokens.push_back(Tok); +    Self.Lex(Tok); +  } + +  void revert() { +    assert(Collect && "did not collect tokens"); +    assert(!Tokens.empty() && "collected unexpected number of tokens"); + +    // Push the ( "string" ) tokens into the token stream. +    auto Toks = std::make_unique<Token[]>(Tokens.size()); +    std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get()); +    Toks[Tokens.size() - 1] = Tok; +    Self.EnterTokenStream(std::move(Toks), Tokens.size(), +                          /*DisableMacroExpansion*/ true, +                          /*IsReinject*/ true); + +    // ... and return the pragma token unchanged. +    Tok = *Tokens.begin(); +  } +}; +} // namespace + +/// HandlePragmaDirective - The "\#pragma" directive has been parsed.  Lex the +/// rest of the pragma, passing it to the registered pragma handlers. +void Preprocessor::HandlePragmaDirective(PragmaIntroducer Introducer) { +  if (Callbacks) +    Callbacks->PragmaDirective(Introducer.Loc, Introducer.Kind); + +  if (!PragmasEnabled) +    return; + +  ++NumPragma; + +  // Invoke the first level of pragma handlers which reads the namespace id. +  Token Tok; +  PragmaHandlers->HandlePragma(*this, Introducer, Tok); + +  // If the pragma handler didn't read the rest of the line, consume it now. +  if ((CurTokenLexer && CurTokenLexer->isParsingPreprocessorDirective()) +   || (CurPPLexer && CurPPLexer->ParsingPreprocessorDirective)) +    DiscardUntilEndOfDirective(); +} + +/// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then +/// return the first token after the directive.  The _Pragma token has just +/// been read into 'Tok'. +void Preprocessor::Handle_Pragma(Token &Tok) { +  // C11 6.10.3.4/3: +  //   all pragma unary operator expressions within [a completely +  //   macro-replaced preprocessing token sequence] are [...] processed [after +  //   rescanning is complete] +  // +  // This means that we execute _Pragma operators in two cases: +  // +  //  1) on token sequences that would otherwise be produced as the output of +  //     phase 4 of preprocessing, and +  //  2) on token sequences formed as the macro-replaced token sequence of a +  //     macro argument +  // +  // Case #2 appears to be a wording bug: only _Pragmas that would survive to +  // the end of phase 4 should actually be executed. Discussion on the WG14 +  // mailing list suggests that a _Pragma operator is notionally checked early, +  // but only pragmas that survive to the end of phase 4 should be executed. +  // +  // In Case #2, we check the syntax now, but then put the tokens back into the +  // token stream for later consumption. + +  TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok}; + +  // Remember the pragma token location. +  SourceLocation PragmaLoc = Tok.getLocation(); + +  // Read the '('. +  Toks.lex(); +  if (Tok.isNot(tok::l_paren)) { +    Diag(PragmaLoc, diag::err__Pragma_malformed); +    return; +  } + +  // Read the '"..."'. +  Toks.lex(); +  if (!tok::isStringLiteral(Tok.getKind())) { +    Diag(PragmaLoc, diag::err__Pragma_malformed); +    // Skip bad tokens, and the ')', if present. +    if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eof)) +      Lex(Tok); +    while (Tok.isNot(tok::r_paren) && +           !Tok.isAtStartOfLine() && +           Tok.isNot(tok::eof)) +      Lex(Tok); +    if (Tok.is(tok::r_paren)) +      Lex(Tok); +    return; +  } + +  if (Tok.hasUDSuffix()) { +    Diag(Tok, diag::err_invalid_string_udl); +    // Skip this token, and the ')', if present. +    Lex(Tok); +    if (Tok.is(tok::r_paren)) +      Lex(Tok); +    return; +  } + +  // Remember the string. +  Token StrTok = Tok; + +  // Read the ')'. +  Toks.lex(); +  if (Tok.isNot(tok::r_paren)) { +    Diag(PragmaLoc, diag::err__Pragma_malformed); +    return; +  } + +  // If we're expanding a macro argument, put the tokens back. +  if (InMacroArgPreExpansion) { +    Toks.revert(); +    return; +  } + +  SourceLocation RParenLoc = Tok.getLocation(); +  std::string StrVal = getSpelling(StrTok); + +  // The _Pragma is lexically sound.  Destringize according to C11 6.10.9.1: +  // "The string literal is destringized by deleting any encoding prefix, +  // deleting the leading and trailing double-quotes, replacing each escape +  // sequence \" by a double-quote, and replacing each escape sequence \\ by a +  // single backslash." +  if (StrVal[0] == 'L' || StrVal[0] == 'U' || +      (StrVal[0] == 'u' && StrVal[1] != '8')) +    StrVal.erase(StrVal.begin()); +  else if (StrVal[0] == 'u') +    StrVal.erase(StrVal.begin(), StrVal.begin() + 2); + +  if (StrVal[0] == 'R') { +    // FIXME: C++11 does not specify how to handle raw-string-literals here. +    // We strip off the 'R', the quotes, the d-char-sequences, and the parens. +    assert(StrVal[1] == '"' && StrVal[StrVal.size() - 1] == '"' && +           "Invalid raw string token!"); + +    // Measure the length of the d-char-sequence. +    unsigned NumDChars = 0; +    while (StrVal[2 + NumDChars] != '(') { +      assert(NumDChars < (StrVal.size() - 5) / 2 && +             "Invalid raw string token!"); +      ++NumDChars; +    } +    assert(StrVal[StrVal.size() - 2 - NumDChars] == ')'); + +    // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the +    // parens below. +    StrVal.erase(0, 2 + NumDChars); +    StrVal.erase(StrVal.size() - 1 - NumDChars); +  } else { +    assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && +           "Invalid string token!"); + +    // Remove escaped quotes and escapes. +    unsigned ResultPos = 1; +    for (size_t i = 1, e = StrVal.size() - 1; i != e; ++i) { +      // Skip escapes.  \\ -> '\' and \" -> '"'. +      if (StrVal[i] == '\\' && i + 1 < e && +          (StrVal[i + 1] == '\\' || StrVal[i + 1] == '"')) +        ++i; +      StrVal[ResultPos++] = StrVal[i]; +    } +    StrVal.erase(StrVal.begin() + ResultPos, StrVal.end() - 1); +  } + +  // Remove the front quote, replacing it with a space, so that the pragma +  // contents appear to have a space before them. +  StrVal[0] = ' '; + +  // Replace the terminating quote with a \n. +  StrVal[StrVal.size()-1] = '\n'; + +  // Plop the string (including the newline and trailing null) into a buffer +  // where we can lex it. +  Token TmpTok; +  TmpTok.startToken(); +  CreateString(StrVal, TmpTok); +  SourceLocation TokLoc = TmpTok.getLocation(); + +  // Make and enter a lexer object so that we lex and expand the tokens just +  // like any others. +  Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc, +                                        StrVal.size(), *this); + +  EnterSourceFileWithLexer(TL, nullptr); + +  // With everything set up, lex this as a #pragma directive. +  HandlePragmaDirective({PIK__Pragma, PragmaLoc}); + +  // Finally, return whatever came after the pragma directive. +  return Lex(Tok); +} + +/// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text +/// is not enclosed within a string literal. +void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { +  // During macro pre-expansion, check the syntax now but put the tokens back +  // into the token stream for later consumption. Same as Handle_Pragma. +  TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok}; + +  // Remember the pragma token location. +  SourceLocation PragmaLoc = Tok.getLocation(); + +  // Read the '('. +  Toks.lex(); +  if (Tok.isNot(tok::l_paren)) { +    Diag(PragmaLoc, diag::err__Pragma_malformed); +    return; +  } + +  // Get the tokens enclosed within the __pragma(), as well as the final ')'. +  SmallVector<Token, 32> PragmaToks; +  int NumParens = 0; +  Toks.lex(); +  while (Tok.isNot(tok::eof)) { +    PragmaToks.push_back(Tok); +    if (Tok.is(tok::l_paren)) +      NumParens++; +    else if (Tok.is(tok::r_paren) && NumParens-- == 0) +      break; +    Toks.lex(); +  } + +  if (Tok.is(tok::eof)) { +    Diag(PragmaLoc, diag::err_unterminated___pragma); +    return; +  } + +  // If we're expanding a macro argument, put the tokens back. +  if (InMacroArgPreExpansion) { +    Toks.revert(); +    return; +  } + +  PragmaToks.front().setFlag(Token::LeadingSpace); + +  // Replace the ')' with an EOD to mark the end of the pragma. +  PragmaToks.back().setKind(tok::eod); + +  Token *TokArray = new Token[PragmaToks.size()]; +  std::copy(PragmaToks.begin(), PragmaToks.end(), TokArray); + +  // Push the tokens onto the stack. +  EnterTokenStream(TokArray, PragmaToks.size(), true, true, +                   /*IsReinject*/ false); + +  // With everything set up, lex this as a #pragma directive. +  HandlePragmaDirective({PIK___pragma, PragmaLoc}); + +  // Finally, return whatever came after the pragma directive. +  return Lex(Tok); +} + +/// HandlePragmaOnce - Handle \#pragma once.  OnceTok is the 'once'. +void Preprocessor::HandlePragmaOnce(Token &OnceTok) { +  // Don't honor the 'once' when handling the primary source file, unless +  // this is a prefix to a TU, which indicates we're generating a PCH file, or +  // when the main file is a header (e.g. when -xc-header is provided on the +  // commandline). +  if (isInPrimaryFile() && TUKind != TU_Prefix && !getLangOpts().IsHeaderFile) { +    Diag(OnceTok, diag::pp_pragma_once_in_main_file); +    return; +  } + +  // Get the current file lexer we're looking at.  Ignore _Pragma 'files' etc. +  // Mark the file as a once-only file now. +  HeaderInfo.MarkFileIncludeOnce(getCurrentFileLexer()->getFileEntry()); +} + +void Preprocessor::HandlePragmaMark() { +  assert(CurPPLexer && "No current lexer?"); +  CurLexer->ReadToEndOfLine(); +} + +/// HandlePragmaPoison - Handle \#pragma GCC poison.  PoisonTok is the 'poison'. +void Preprocessor::HandlePragmaPoison() { +  Token Tok; + +  while (true) { +    // Read the next token to poison.  While doing this, pretend that we are +    // skipping while reading the identifier to poison. +    // This avoids errors on code like: +    //   #pragma GCC poison X +    //   #pragma GCC poison X +    if (CurPPLexer) CurPPLexer->LexingRawMode = true; +    LexUnexpandedToken(Tok); +    if (CurPPLexer) CurPPLexer->LexingRawMode = false; + +    // If we reached the end of line, we're done. +    if (Tok.is(tok::eod)) return; + +    // Can only poison identifiers. +    if (Tok.isNot(tok::raw_identifier)) { +      Diag(Tok, diag::err_pp_invalid_poison); +      return; +    } + +    // Look up the identifier info for the token.  We disabled identifier lookup +    // by saying we're skipping contents, so we need to do this manually. +    IdentifierInfo *II = LookUpIdentifierInfo(Tok); + +    // Already poisoned. +    if (II->isPoisoned()) continue; + +    // If this is a macro identifier, emit a warning. +    if (isMacroDefined(II)) +      Diag(Tok, diag::pp_poisoning_existing_macro); + +    // Finally, poison it! +    II->setIsPoisoned(); +    if (II->isFromAST()) +      II->setChangedSinceDeserialization(); +  } +} + +/// HandlePragmaSystemHeader - Implement \#pragma GCC system_header.  We know +/// that the whole directive has been parsed. +void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { +  if (isInPrimaryFile()) { +    Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file); +    return; +  } + +  // Get the current file lexer we're looking at.  Ignore _Pragma 'files' etc. +  PreprocessorLexer *TheLexer = getCurrentFileLexer(); + +  // Mark the file as a system header. +  HeaderInfo.MarkFileSystemHeader(TheLexer->getFileEntry()); + +  PresumedLoc PLoc = SourceMgr.getPresumedLoc(SysHeaderTok.getLocation()); +  if (PLoc.isInvalid()) +    return; + +  unsigned FilenameID = SourceMgr.getLineTableFilenameID(PLoc.getFilename()); + +  // Notify the client, if desired, that we are in a new source file. +  if (Callbacks) +    Callbacks->FileChanged(SysHeaderTok.getLocation(), +                           PPCallbacks::SystemHeaderPragma, SrcMgr::C_System); + +  // Emit a line marker.  This will change any source locations from this point +  // forward to realize they are in a system header. +  // Create a line note with this information. +  SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine() + 1, +                        FilenameID, /*IsEntry=*/false, /*IsExit=*/false, +                        SrcMgr::C_System); +} + +/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah. +void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { +  Token FilenameTok; +  if (LexHeaderName(FilenameTok, /*AllowConcatenation*/false)) +    return; + +  // If the next token wasn't a header-name, diagnose the error. +  if (FilenameTok.isNot(tok::header_name)) { +    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); +    return; +  } + +  // Reserve a buffer to get the spelling. +  SmallString<128> FilenameBuffer; +  bool Invalid = false; +  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid); +  if (Invalid) +    return; + +  bool isAngled = +    GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); +  // If GetIncludeFilenameSpelling set the start ptr to null, there was an +  // error. +  if (Filename.empty()) +    return; + +  // Search include directories for this file. +  const DirectoryLookup *CurDir; +  Optional<FileEntryRef> File = +      LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr, +                 nullptr, CurDir, nullptr, nullptr, nullptr, nullptr, nullptr); +  if (!File) { +    if (!SuppressIncludeNotFoundError) +      Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; +    return; +  } + +  const FileEntry *CurFile = getCurrentFileLexer()->getFileEntry(); + +  // If this file is older than the file it depends on, emit a diagnostic. +  if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) { +    // Lex tokens at the end of the message and include them in the message. +    std::string Message; +    Lex(DependencyTok); +    while (DependencyTok.isNot(tok::eod)) { +      Message += getSpelling(DependencyTok) + " "; +      Lex(DependencyTok); +    } + +    // Remove the trailing ' ' if present. +    if (!Message.empty()) +      Message.erase(Message.end()-1); +    Diag(FilenameTok, diag::pp_out_of_date_dependency) << Message; +  } +} + +/// ParsePragmaPushOrPopMacro - Handle parsing of pragma push_macro/pop_macro. +/// Return the IdentifierInfo* associated with the macro to push or pop. +IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) { +  // Remember the pragma token location. +  Token PragmaTok = Tok; + +  // Read the '('. +  Lex(Tok); +  if (Tok.isNot(tok::l_paren)) { +    Diag(PragmaTok.getLocation(), diag::err_pragma_push_pop_macro_malformed) +      << getSpelling(PragmaTok); +    return nullptr; +  } + +  // Read the macro name string. +  Lex(Tok); +  if (Tok.isNot(tok::string_literal)) { +    Diag(PragmaTok.getLocation(), diag::err_pragma_push_pop_macro_malformed) +      << getSpelling(PragmaTok); +    return nullptr; +  } + +  if (Tok.hasUDSuffix()) { +    Diag(Tok, diag::err_invalid_string_udl); +    return nullptr; +  } + +  // Remember the macro string. +  std::string StrVal = getSpelling(Tok); + +  // Read the ')'. +  Lex(Tok); +  if (Tok.isNot(tok::r_paren)) { +    Diag(PragmaTok.getLocation(), diag::err_pragma_push_pop_macro_malformed) +      << getSpelling(PragmaTok); +    return nullptr; +  } + +  assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && +         "Invalid string token!"); + +  // Create a Token from the string. +  Token MacroTok; +  MacroTok.startToken(); +  MacroTok.setKind(tok::raw_identifier); +  CreateString(StringRef(&StrVal[1], StrVal.size() - 2), MacroTok); + +  // Get the IdentifierInfo of MacroToPushTok. +  return LookUpIdentifierInfo(MacroTok); +} + +/// Handle \#pragma push_macro. +/// +/// The syntax is: +/// \code +///   #pragma push_macro("macro") +/// \endcode +void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) { +  // Parse the pragma directive and get the macro IdentifierInfo*. +  IdentifierInfo *IdentInfo = ParsePragmaPushOrPopMacro(PushMacroTok); +  if (!IdentInfo) return; + +  // Get the MacroInfo associated with IdentInfo. +  MacroInfo *MI = getMacroInfo(IdentInfo); + +  if (MI) { +    // Allow the original MacroInfo to be redefined later. +    MI->setIsAllowRedefinitionsWithoutWarning(true); +  } + +  // Push the cloned MacroInfo so we can retrieve it later. +  PragmaPushMacroInfo[IdentInfo].push_back(MI); +} + +/// Handle \#pragma pop_macro. +/// +/// The syntax is: +/// \code +///   #pragma pop_macro("macro") +/// \endcode +void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { +  SourceLocation MessageLoc = PopMacroTok.getLocation(); + +  // Parse the pragma directive and get the macro IdentifierInfo*. +  IdentifierInfo *IdentInfo = ParsePragmaPushOrPopMacro(PopMacroTok); +  if (!IdentInfo) return; + +  // Find the vector<MacroInfo*> associated with the macro. +  llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>::iterator iter = +    PragmaPushMacroInfo.find(IdentInfo); +  if (iter != PragmaPushMacroInfo.end()) { +    // Forget the MacroInfo currently associated with IdentInfo. +    if (MacroInfo *MI = getMacroInfo(IdentInfo)) { +      if (MI->isWarnIfUnused()) +        WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); +      appendMacroDirective(IdentInfo, AllocateUndefMacroDirective(MessageLoc)); +    } + +    // Get the MacroInfo we want to reinstall. +    MacroInfo *MacroToReInstall = iter->second.back(); + +    if (MacroToReInstall) +      // Reinstall the previously pushed macro. +      appendDefMacroDirective(IdentInfo, MacroToReInstall, MessageLoc); + +    // Pop PragmaPushMacroInfo stack. +    iter->second.pop_back(); +    if (iter->second.empty()) +      PragmaPushMacroInfo.erase(iter); +  } else { +    Diag(MessageLoc, diag::warn_pragma_pop_macro_no_push) +      << IdentInfo->getName(); +  } +} + +void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) { +  // We will either get a quoted filename or a bracketed filename, and we +  // have to track which we got.  The first filename is the source name, +  // and the second name is the mapped filename.  If the first is quoted, +  // the second must be as well (cannot mix and match quotes and brackets). + +  // Get the open paren +  Lex(Tok); +  if (Tok.isNot(tok::l_paren)) { +    Diag(Tok, diag::warn_pragma_include_alias_expected) << "("; +    return; +  } + +  // We expect either a quoted string literal, or a bracketed name +  Token SourceFilenameTok; +  if (LexHeaderName(SourceFilenameTok)) +    return; + +  StringRef SourceFileName; +  SmallString<128> FileNameBuffer; +  if (SourceFilenameTok.is(tok::header_name)) { +    SourceFileName = getSpelling(SourceFilenameTok, FileNameBuffer); +  } else { +    Diag(Tok, diag::warn_pragma_include_alias_expected_filename); +    return; +  } +  FileNameBuffer.clear(); + +  // Now we expect a comma, followed by another include name +  Lex(Tok); +  if (Tok.isNot(tok::comma)) { +    Diag(Tok, diag::warn_pragma_include_alias_expected) << ","; +    return; +  } + +  Token ReplaceFilenameTok; +  if (LexHeaderName(ReplaceFilenameTok)) +    return; + +  StringRef ReplaceFileName; +  if (ReplaceFilenameTok.is(tok::header_name)) { +    ReplaceFileName = getSpelling(ReplaceFilenameTok, FileNameBuffer); +  } else { +    Diag(Tok, diag::warn_pragma_include_alias_expected_filename); +    return; +  } + +  // Finally, we expect the closing paren +  Lex(Tok); +  if (Tok.isNot(tok::r_paren)) { +    Diag(Tok, diag::warn_pragma_include_alias_expected) << ")"; +    return; +  } + +  // Now that we have the source and target filenames, we need to make sure +  // they're both of the same type (angled vs non-angled) +  StringRef OriginalSource = SourceFileName; + +  bool SourceIsAngled = +    GetIncludeFilenameSpelling(SourceFilenameTok.getLocation(), +                                SourceFileName); +  bool ReplaceIsAngled = +    GetIncludeFilenameSpelling(ReplaceFilenameTok.getLocation(), +                                ReplaceFileName); +  if (!SourceFileName.empty() && !ReplaceFileName.empty() && +      (SourceIsAngled != ReplaceIsAngled)) { +    unsigned int DiagID; +    if (SourceIsAngled) +      DiagID = diag::warn_pragma_include_alias_mismatch_angle; +    else +      DiagID = diag::warn_pragma_include_alias_mismatch_quote; + +    Diag(SourceFilenameTok.getLocation(), DiagID) +      << SourceFileName +      << ReplaceFileName; + +    return; +  } + +  // Now we can let the include handler know about this mapping +  getHeaderSearchInfo().AddIncludeAlias(OriginalSource, ReplaceFileName); +} + +// Lex a component of a module name: either an identifier or a string literal; +// for components that can be expressed both ways, the two forms are equivalent. +static bool LexModuleNameComponent( +    Preprocessor &PP, Token &Tok, +    std::pair<IdentifierInfo *, SourceLocation> &ModuleNameComponent, +    bool First) { +  PP.LexUnexpandedToken(Tok); +  if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) { +    StringLiteralParser Literal(Tok, PP); +    if (Literal.hadError) +      return true; +    ModuleNameComponent = std::make_pair( +        PP.getIdentifierInfo(Literal.GetString()), Tok.getLocation()); +  } else if (!Tok.isAnnotation() && Tok.getIdentifierInfo()) { +    ModuleNameComponent = +        std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation()); +  } else { +    PP.Diag(Tok.getLocation(), diag::err_pp_expected_module_name) << First; +    return true; +  } +  return false; +} + +static bool LexModuleName( +    Preprocessor &PP, Token &Tok, +    llvm::SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> +        &ModuleName) { +  while (true) { +    std::pair<IdentifierInfo*, SourceLocation> NameComponent; +    if (LexModuleNameComponent(PP, Tok, NameComponent, ModuleName.empty())) +      return true; +    ModuleName.push_back(NameComponent); + +    PP.LexUnexpandedToken(Tok); +    if (Tok.isNot(tok::period)) +      return false; +  } +} + +void Preprocessor::HandlePragmaModuleBuild(Token &Tok) { +  SourceLocation Loc = Tok.getLocation(); + +  std::pair<IdentifierInfo *, SourceLocation> ModuleNameLoc; +  if (LexModuleNameComponent(*this, Tok, ModuleNameLoc, true)) +    return; +  IdentifierInfo *ModuleName = ModuleNameLoc.first; + +  LexUnexpandedToken(Tok); +  if (Tok.isNot(tok::eod)) { +    Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; +    DiscardUntilEndOfDirective(); +  } + +  CurLexer->LexingRawMode = true; + +  auto TryConsumeIdentifier = [&](StringRef Ident) -> bool { +    if (Tok.getKind() != tok::raw_identifier || +        Tok.getRawIdentifier() != Ident) +      return false; +    CurLexer->Lex(Tok); +    return true; +  }; + +  // Scan forward looking for the end of the module. +  const char *Start = CurLexer->getBufferLocation(); +  const char *End = nullptr; +  unsigned NestingLevel = 1; +  while (true) { +    End = CurLexer->getBufferLocation(); +    CurLexer->Lex(Tok); + +    if (Tok.is(tok::eof)) { +      Diag(Loc, diag::err_pp_module_build_missing_end); +      break; +    } + +    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) { +      // Token was part of module; keep going. +      continue; +    } + +    // We hit something directive-shaped; check to see if this is the end +    // of the module build. +    CurLexer->ParsingPreprocessorDirective = true; +    CurLexer->Lex(Tok); +    if (TryConsumeIdentifier("pragma") && TryConsumeIdentifier("clang") && +        TryConsumeIdentifier("module")) { +      if (TryConsumeIdentifier("build")) +        // #pragma clang module build -> entering a nested module build. +        ++NestingLevel; +      else if (TryConsumeIdentifier("endbuild")) { +        // #pragma clang module endbuild -> leaving a module build. +        if (--NestingLevel == 0) +          break; +      } +      // We should either be looking at the EOD or more of the current directive +      // preceding the EOD. Either way we can ignore this token and keep going. +      assert(Tok.getKind() != tok::eof && "missing EOD before EOF"); +    } +  } + +  CurLexer->LexingRawMode = false; + +  // Load the extracted text as a preprocessed module. +  assert(CurLexer->getBuffer().begin() <= Start && +         Start <= CurLexer->getBuffer().end() && +         CurLexer->getBuffer().begin() <= End && +         End <= CurLexer->getBuffer().end() && +         "module source range not contained within same file buffer"); +  TheModuleLoader.loadModuleFromSource(Loc, ModuleName->getName(), +                                       StringRef(Start, End - Start)); +} + +void Preprocessor::HandlePragmaHdrstop(Token &Tok) { +  Lex(Tok); +  if (Tok.is(tok::l_paren)) { +    Diag(Tok.getLocation(), diag::warn_pp_hdrstop_filename_ignored); + +    std::string FileName; +    if (!LexStringLiteral(Tok, FileName, "pragma hdrstop", false)) +      return; + +    if (Tok.isNot(tok::r_paren)) { +      Diag(Tok, diag::err_expected) << tok::r_paren; +      return; +    } +    Lex(Tok); +  } +  if (Tok.isNot(tok::eod)) +    Diag(Tok.getLocation(), diag::ext_pp_extra_tokens_at_eol) +        << "pragma hdrstop"; + +  if (creatingPCHWithPragmaHdrStop() && +      SourceMgr.isInMainFile(Tok.getLocation())) { +    assert(CurLexer && "no lexer for #pragma hdrstop processing"); +    Token &Result = Tok; +    Result.startToken(); +    CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); +    CurLexer->cutOffLexing(); +  } +  if (usingPCHWithPragmaHdrStop()) +    SkippingUntilPragmaHdrStop = false; +} + +/// AddPragmaHandler - Add the specified pragma handler to the preprocessor. +/// If 'Namespace' is non-null, then it is a token required to exist on the +/// pragma line before the pragma string starts, e.g. "STDC" or "GCC". +void Preprocessor::AddPragmaHandler(StringRef Namespace, +                                    PragmaHandler *Handler) { +  PragmaNamespace *InsertNS = PragmaHandlers.get(); + +  // If this is specified to be in a namespace, step down into it. +  if (!Namespace.empty()) { +    // If there is already a pragma handler with the name of this namespace, +    // we either have an error (directive with the same name as a namespace) or +    // we already have the namespace to insert into. +    if (PragmaHandler *Existing = PragmaHandlers->FindHandler(Namespace)) { +      InsertNS = Existing->getIfNamespace(); +      assert(InsertNS != nullptr && "Cannot have a pragma namespace and pragma" +             " handler with the same name!"); +    } else { +      // Otherwise, this namespace doesn't exist yet, create and insert the +      // handler for it. +      InsertNS = new PragmaNamespace(Namespace); +      PragmaHandlers->AddPragma(InsertNS); +    } +  } + +  // Check to make sure we don't already have a pragma for this identifier. +  assert(!InsertNS->FindHandler(Handler->getName()) && +         "Pragma handler already exists for this identifier!"); +  InsertNS->AddPragma(Handler); +} + +/// RemovePragmaHandler - Remove the specific pragma handler from the +/// preprocessor. If \arg Namespace is non-null, then it should be the +/// namespace that \arg Handler was added to. It is an error to remove +/// a handler that has not been registered. +void Preprocessor::RemovePragmaHandler(StringRef Namespace, +                                       PragmaHandler *Handler) { +  PragmaNamespace *NS = PragmaHandlers.get(); + +  // If this is specified to be in a namespace, step down into it. +  if (!Namespace.empty()) { +    PragmaHandler *Existing = PragmaHandlers->FindHandler(Namespace); +    assert(Existing && "Namespace containing handler does not exist!"); + +    NS = Existing->getIfNamespace(); +    assert(NS && "Invalid namespace, registered as a regular pragma handler!"); +  } + +  NS->RemovePragmaHandler(Handler); + +  // If this is a non-default namespace and it is now empty, remove it. +  if (NS != PragmaHandlers.get() && NS->IsEmpty()) { +    PragmaHandlers->RemovePragmaHandler(NS); +    delete NS; +  } +} + +bool Preprocessor::LexOnOffSwitch(tok::OnOffSwitch &Result) { +  Token Tok; +  LexUnexpandedToken(Tok); + +  if (Tok.isNot(tok::identifier)) { +    Diag(Tok, diag::ext_on_off_switch_syntax); +    return true; +  } +  IdentifierInfo *II = Tok.getIdentifierInfo(); +  if (II->isStr("ON")) +    Result = tok::OOS_ON; +  else if (II->isStr("OFF")) +    Result = tok::OOS_OFF; +  else if (II->isStr("DEFAULT")) +    Result = tok::OOS_DEFAULT; +  else { +    Diag(Tok, diag::ext_on_off_switch_syntax); +    return true; +  } + +  // Verify that this is followed by EOD. +  LexUnexpandedToken(Tok); +  if (Tok.isNot(tok::eod)) +    Diag(Tok, diag::ext_pragma_syntax_eod); +  return false; +} + +namespace { + +/// PragmaOnceHandler - "\#pragma once" marks the file as atomically included. +struct PragmaOnceHandler : public PragmaHandler { +  PragmaOnceHandler() : PragmaHandler("once") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &OnceTok) override { +    PP.CheckEndOfDirective("pragma once"); +    PP.HandlePragmaOnce(OnceTok); +  } +}; + +/// PragmaMarkHandler - "\#pragma mark ..." is ignored by the compiler, and the +/// rest of the line is not lexed. +struct PragmaMarkHandler : public PragmaHandler { +  PragmaMarkHandler() : PragmaHandler("mark") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &MarkTok) override { +    PP.HandlePragmaMark(); +  } +}; + +/// PragmaPoisonHandler - "\#pragma poison x" marks x as not usable. +struct PragmaPoisonHandler : public PragmaHandler { +  PragmaPoisonHandler() : PragmaHandler("poison") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &PoisonTok) override { +    PP.HandlePragmaPoison(); +  } +}; + +/// PragmaSystemHeaderHandler - "\#pragma system_header" marks the current file +/// as a system header, which silences warnings in it. +struct PragmaSystemHeaderHandler : public PragmaHandler { +  PragmaSystemHeaderHandler() : PragmaHandler("system_header") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &SHToken) override { +    PP.HandlePragmaSystemHeader(SHToken); +    PP.CheckEndOfDirective("pragma"); +  } +}; + +struct PragmaDependencyHandler : public PragmaHandler { +  PragmaDependencyHandler() : PragmaHandler("dependency") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &DepToken) override { +    PP.HandlePragmaDependency(DepToken); +  } +}; + +struct PragmaDebugHandler : public PragmaHandler { +  PragmaDebugHandler() : PragmaHandler("__debug") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &DebugToken) override { +    Token Tok; +    PP.LexUnexpandedToken(Tok); +    if (Tok.isNot(tok::identifier)) { +      PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid); +      return; +    } +    IdentifierInfo *II = Tok.getIdentifierInfo(); + +    if (II->isStr("assert")) { +      llvm_unreachable("This is an assertion!"); +    } else if (II->isStr("crash")) { +      LLVM_BUILTIN_TRAP; +    } else if (II->isStr("parser_crash")) { +      Token Crasher; +      Crasher.startToken(); +      Crasher.setKind(tok::annot_pragma_parser_crash); +      Crasher.setAnnotationRange(SourceRange(Tok.getLocation())); +      PP.EnterToken(Crasher, /*IsReinject*/false); +    } else if (II->isStr("dump")) { +      Token Identifier; +      PP.LexUnexpandedToken(Identifier); +      if (auto *DumpII = Identifier.getIdentifierInfo()) { +        Token DumpAnnot; +        DumpAnnot.startToken(); +        DumpAnnot.setKind(tok::annot_pragma_dump); +        DumpAnnot.setAnnotationRange( +            SourceRange(Tok.getLocation(), Identifier.getLocation())); +        DumpAnnot.setAnnotationValue(DumpII); +        PP.DiscardUntilEndOfDirective(); +        PP.EnterToken(DumpAnnot, /*IsReinject*/false); +      } else { +        PP.Diag(Identifier, diag::warn_pragma_debug_missing_argument) +            << II->getName(); +      } +    } else if (II->isStr("diag_mapping")) { +      Token DiagName; +      PP.LexUnexpandedToken(DiagName); +      if (DiagName.is(tok::eod)) +        PP.getDiagnostics().dump(); +      else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) { +        StringLiteralParser Literal(DiagName, PP); +        if (Literal.hadError) +          return; +        PP.getDiagnostics().dump(Literal.GetString()); +      } else { +        PP.Diag(DiagName, diag::warn_pragma_debug_missing_argument) +            << II->getName(); +      } +    } else if (II->isStr("llvm_fatal_error")) { +      llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error"); +    } else if (II->isStr("llvm_unreachable")) { +      llvm_unreachable("#pragma clang __debug llvm_unreachable"); +    } else if (II->isStr("macro")) { +      Token MacroName; +      PP.LexUnexpandedToken(MacroName); +      auto *MacroII = MacroName.getIdentifierInfo(); +      if (MacroII) +        PP.dumpMacroInfo(MacroII); +      else +        PP.Diag(MacroName, diag::warn_pragma_debug_missing_argument) +            << II->getName(); +    } else if (II->isStr("module_map")) { +      llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8> +          ModuleName; +      if (LexModuleName(PP, Tok, ModuleName)) +        return; +      ModuleMap &MM = PP.getHeaderSearchInfo().getModuleMap(); +      Module *M = nullptr; +      for (auto IIAndLoc : ModuleName) { +        M = MM.lookupModuleQualified(IIAndLoc.first->getName(), M); +        if (!M) { +          PP.Diag(IIAndLoc.second, diag::warn_pragma_debug_unknown_module) +              << IIAndLoc.first; +          return; +        } +      } +      M->dump(); +    } else if (II->isStr("overflow_stack")) { +      DebugOverflowStack(); +    } else if (II->isStr("handle_crash")) { +      llvm::CrashRecoveryContext *CRC =llvm::CrashRecoveryContext::GetCurrent(); +      if (CRC) +        CRC->HandleCrash(); +    } else if (II->isStr("captured")) { +      HandleCaptured(PP); +    } else { +      PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command) +        << II->getName(); +    } + +    PPCallbacks *Callbacks = PP.getPPCallbacks(); +    if (Callbacks) +      Callbacks->PragmaDebug(Tok.getLocation(), II->getName()); +  } + +  void HandleCaptured(Preprocessor &PP) { +    Token Tok; +    PP.LexUnexpandedToken(Tok); + +    if (Tok.isNot(tok::eod)) { +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) +        << "pragma clang __debug captured"; +      return; +    } + +    SourceLocation NameLoc = Tok.getLocation(); +    MutableArrayRef<Token> Toks( +        PP.getPreprocessorAllocator().Allocate<Token>(1), 1); +    Toks[0].startToken(); +    Toks[0].setKind(tok::annot_pragma_captured); +    Toks[0].setLocation(NameLoc); + +    PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true, +                        /*IsReinject=*/false); +  } + +// Disable MSVC warning about runtime stack overflow. +#ifdef _MSC_VER +    #pragma warning(disable : 4717) +#endif +  static void DebugOverflowStack(void (*P)() = nullptr) { +    void (*volatile Self)(void(*P)()) = DebugOverflowStack; +    Self(reinterpret_cast<void(*)()>(Self)); +  } +#ifdef _MSC_VER +    #pragma warning(default : 4717) +#endif +}; + +/// PragmaDiagnosticHandler - e.g. '\#pragma GCC diagnostic ignored "-Wformat"' +struct PragmaDiagnosticHandler : public PragmaHandler { +private: +  const char *Namespace; + +public: +  explicit PragmaDiagnosticHandler(const char *NS) +      : PragmaHandler("diagnostic"), Namespace(NS) {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &DiagToken) override { +    SourceLocation DiagLoc = DiagToken.getLocation(); +    Token Tok; +    PP.LexUnexpandedToken(Tok); +    if (Tok.isNot(tok::identifier)) { +      PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid); +      return; +    } +    IdentifierInfo *II = Tok.getIdentifierInfo(); +    PPCallbacks *Callbacks = PP.getPPCallbacks(); + +    if (II->isStr("pop")) { +      if (!PP.getDiagnostics().popMappings(DiagLoc)) +        PP.Diag(Tok, diag::warn_pragma_diagnostic_cannot_pop); +      else if (Callbacks) +        Callbacks->PragmaDiagnosticPop(DiagLoc, Namespace); +      return; +    } else if (II->isStr("push")) { +      PP.getDiagnostics().pushMappings(DiagLoc); +      if (Callbacks) +        Callbacks->PragmaDiagnosticPush(DiagLoc, Namespace); +      return; +    } + +    diag::Severity SV = llvm::StringSwitch<diag::Severity>(II->getName()) +                            .Case("ignored", diag::Severity::Ignored) +                            .Case("warning", diag::Severity::Warning) +                            .Case("error", diag::Severity::Error) +                            .Case("fatal", diag::Severity::Fatal) +                            .Default(diag::Severity()); + +    if (SV == diag::Severity()) { +      PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid); +      return; +    } + +    PP.LexUnexpandedToken(Tok); +    SourceLocation StringLoc = Tok.getLocation(); + +    std::string WarningName; +    if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic", +                                   /*AllowMacroExpansion=*/false)) +      return; + +    if (Tok.isNot(tok::eod)) { +      PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token); +      return; +    } + +    if (WarningName.size() < 3 || WarningName[0] != '-' || +        (WarningName[1] != 'W' && WarningName[1] != 'R')) { +      PP.Diag(StringLoc, diag::warn_pragma_diagnostic_invalid_option); +      return; +    } + +    diag::Flavor Flavor = WarningName[1] == 'W' ? diag::Flavor::WarningOrError +                                                : diag::Flavor::Remark; +    StringRef Group = StringRef(WarningName).substr(2); +    bool unknownDiag = false; +    if (Group == "everything") { +      // Special handling for pragma clang diagnostic ... "-Weverything". +      // There is no formal group named "everything", so there has to be a +      // special case for it. +      PP.getDiagnostics().setSeverityForAll(Flavor, SV, DiagLoc); +    } else +      unknownDiag = PP.getDiagnostics().setSeverityForGroup(Flavor, Group, SV, +                                                            DiagLoc); +    if (unknownDiag) +      PP.Diag(StringLoc, diag::warn_pragma_diagnostic_unknown_warning) +        << WarningName; +    else if (Callbacks) +      Callbacks->PragmaDiagnostic(DiagLoc, Namespace, SV, WarningName); +  } +}; + +/// "\#pragma hdrstop [<header-name-string>]" +struct PragmaHdrstopHandler : public PragmaHandler { +  PragmaHdrstopHandler() : PragmaHandler("hdrstop") {} +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &DepToken) override { +    PP.HandlePragmaHdrstop(DepToken); +  } +}; + +/// "\#pragma warning(...)".  MSVC's diagnostics do not map cleanly to clang's +/// diagnostics, so we don't really implement this pragma.  We parse it and +/// ignore it to avoid -Wunknown-pragma warnings. +struct PragmaWarningHandler : public PragmaHandler { +  PragmaWarningHandler() : PragmaHandler("warning") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    // Parse things like: +    // warning(push, 1) +    // warning(pop) +    // warning(disable : 1 2 3 ; error : 4 5 6 ; suppress : 7 8 9) +    SourceLocation DiagLoc = Tok.getLocation(); +    PPCallbacks *Callbacks = PP.getPPCallbacks(); + +    PP.Lex(Tok); +    if (Tok.isNot(tok::l_paren)) { +      PP.Diag(Tok, diag::warn_pragma_warning_expected) << "("; +      return; +    } + +    PP.Lex(Tok); +    IdentifierInfo *II = Tok.getIdentifierInfo(); + +    if (II && II->isStr("push")) { +      // #pragma warning( push[ ,n ] ) +      int Level = -1; +      PP.Lex(Tok); +      if (Tok.is(tok::comma)) { +        PP.Lex(Tok); +        uint64_t Value; +        if (Tok.is(tok::numeric_constant) && +            PP.parseSimpleIntegerLiteral(Tok, Value)) +          Level = int(Value); +        if (Level < 0 || Level > 4) { +          PP.Diag(Tok, diag::warn_pragma_warning_push_level); +          return; +        } +      } +      if (Callbacks) +        Callbacks->PragmaWarningPush(DiagLoc, Level); +    } else if (II && II->isStr("pop")) { +      // #pragma warning( pop ) +      PP.Lex(Tok); +      if (Callbacks) +        Callbacks->PragmaWarningPop(DiagLoc); +    } else { +      // #pragma warning( warning-specifier : warning-number-list +      //                  [; warning-specifier : warning-number-list...] ) +      while (true) { +        II = Tok.getIdentifierInfo(); +        if (!II && !Tok.is(tok::numeric_constant)) { +          PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid); +          return; +        } + +        // Figure out which warning specifier this is. +        bool SpecifierValid; +        StringRef Specifier; +        llvm::SmallString<1> SpecifierBuf; +        if (II) { +          Specifier = II->getName(); +          SpecifierValid = llvm::StringSwitch<bool>(Specifier) +                               .Cases("default", "disable", "error", "once", +                                      "suppress", true) +                               .Default(false); +          // If we read a correct specifier, snatch next token (that should be +          // ":", checked later). +          if (SpecifierValid) +            PP.Lex(Tok); +        } else { +          // Token is a numeric constant. It should be either 1, 2, 3 or 4. +          uint64_t Value; +          Specifier = PP.getSpelling(Tok, SpecifierBuf); +          if (PP.parseSimpleIntegerLiteral(Tok, Value)) { +            SpecifierValid = (Value >= 1) && (Value <= 4); +          } else +            SpecifierValid = false; +          // Next token already snatched by parseSimpleIntegerLiteral. +        } + +        if (!SpecifierValid) { +          PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid); +          return; +        } +        if (Tok.isNot(tok::colon)) { +          PP.Diag(Tok, diag::warn_pragma_warning_expected) << ":"; +          return; +        } + +        // Collect the warning ids. +        SmallVector<int, 4> Ids; +        PP.Lex(Tok); +        while (Tok.is(tok::numeric_constant)) { +          uint64_t Value; +          if (!PP.parseSimpleIntegerLiteral(Tok, Value) || Value == 0 || +              Value > std::numeric_limits<int>::max()) { +            PP.Diag(Tok, diag::warn_pragma_warning_expected_number); +            return; +          } +          Ids.push_back(int(Value)); +        } +        if (Callbacks) +          Callbacks->PragmaWarning(DiagLoc, Specifier, Ids); + +        // Parse the next specifier if there is a semicolon. +        if (Tok.isNot(tok::semi)) +          break; +        PP.Lex(Tok); +      } +    } + +    if (Tok.isNot(tok::r_paren)) { +      PP.Diag(Tok, diag::warn_pragma_warning_expected) << ")"; +      return; +    } + +    PP.Lex(Tok); +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma warning"; +  } +}; + +/// "\#pragma execution_character_set(...)". MSVC supports this pragma only +/// for "UTF-8". We parse it and ignore it if UTF-8 is provided and warn +/// otherwise to avoid -Wunknown-pragma warnings. +struct PragmaExecCharsetHandler : public PragmaHandler { +  PragmaExecCharsetHandler() : PragmaHandler("execution_character_set") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    // Parse things like: +    // execution_character_set(push, "UTF-8") +    // execution_character_set(pop) +    SourceLocation DiagLoc = Tok.getLocation(); +    PPCallbacks *Callbacks = PP.getPPCallbacks(); + +    PP.Lex(Tok); +    if (Tok.isNot(tok::l_paren)) { +      PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << "("; +      return; +    } + +    PP.Lex(Tok); +    IdentifierInfo *II = Tok.getIdentifierInfo(); + +    if (II && II->isStr("push")) { +      // #pragma execution_character_set( push[ , string ] ) +      PP.Lex(Tok); +      if (Tok.is(tok::comma)) { +        PP.Lex(Tok); + +        std::string ExecCharset; +        if (!PP.FinishLexStringLiteral(Tok, ExecCharset, +                                       "pragma execution_character_set", +                                       /*AllowMacroExpansion=*/false)) +          return; + +        // MSVC supports either of these, but nothing else. +        if (ExecCharset != "UTF-8" && ExecCharset != "utf-8") { +          PP.Diag(Tok, diag::warn_pragma_exec_charset_push_invalid) << ExecCharset; +          return; +        } +      } +      if (Callbacks) +        Callbacks->PragmaExecCharsetPush(DiagLoc, "UTF-8"); +    } else if (II && II->isStr("pop")) { +      // #pragma execution_character_set( pop ) +      PP.Lex(Tok); +      if (Callbacks) +        Callbacks->PragmaExecCharsetPop(DiagLoc); +    } else { +      PP.Diag(Tok, diag::warn_pragma_exec_charset_spec_invalid); +      return; +    } + +    if (Tok.isNot(tok::r_paren)) { +      PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << ")"; +      return; +    } + +    PP.Lex(Tok); +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma execution_character_set"; +  } +}; + +/// PragmaIncludeAliasHandler - "\#pragma include_alias("...")". +struct PragmaIncludeAliasHandler : public PragmaHandler { +  PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &IncludeAliasTok) override { +    PP.HandlePragmaIncludeAlias(IncludeAliasTok); +  } +}; + +/// PragmaMessageHandler - Handle the microsoft and gcc \#pragma message +/// extension.  The syntax is: +/// \code +///   #pragma message(string) +/// \endcode +/// OR, in GCC mode: +/// \code +///   #pragma message string +/// \endcode +/// string is a string, which is fully macro expanded, and permits string +/// concatenation, embedded escape characters, etc... See MSDN for more details. +/// Also handles \#pragma GCC warning and \#pragma GCC error which take the same +/// form as \#pragma message. +struct PragmaMessageHandler : public PragmaHandler { +private: +  const PPCallbacks::PragmaMessageKind Kind; +  const StringRef Namespace; + +  static const char* PragmaKind(PPCallbacks::PragmaMessageKind Kind, +                                bool PragmaNameOnly = false) { +    switch (Kind) { +      case PPCallbacks::PMK_Message: +        return PragmaNameOnly ? "message" : "pragma message"; +      case PPCallbacks::PMK_Warning: +        return PragmaNameOnly ? "warning" : "pragma warning"; +      case PPCallbacks::PMK_Error: +        return PragmaNameOnly ? "error" : "pragma error"; +    } +    llvm_unreachable("Unknown PragmaMessageKind!"); +  } + +public: +  PragmaMessageHandler(PPCallbacks::PragmaMessageKind Kind, +                       StringRef Namespace = StringRef()) +      : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind), +        Namespace(Namespace) {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    SourceLocation MessageLoc = Tok.getLocation(); +    PP.Lex(Tok); +    bool ExpectClosingParen = false; +    switch (Tok.getKind()) { +    case tok::l_paren: +      // We have a MSVC style pragma message. +      ExpectClosingParen = true; +      // Read the string. +      PP.Lex(Tok); +      break; +    case tok::string_literal: +      // We have a GCC style pragma message, and we just read the string. +      break; +    default: +      PP.Diag(MessageLoc, diag::err_pragma_message_malformed) << Kind; +      return; +    } + +    std::string MessageString; +    if (!PP.FinishLexStringLiteral(Tok, MessageString, PragmaKind(Kind), +                                   /*AllowMacroExpansion=*/true)) +      return; + +    if (ExpectClosingParen) { +      if (Tok.isNot(tok::r_paren)) { +        PP.Diag(Tok.getLocation(), diag::err_pragma_message_malformed) << Kind; +        return; +      } +      PP.Lex(Tok);  // eat the r_paren. +    } + +    if (Tok.isNot(tok::eod)) { +      PP.Diag(Tok.getLocation(), diag::err_pragma_message_malformed) << Kind; +      return; +    } + +    // Output the message. +    PP.Diag(MessageLoc, (Kind == PPCallbacks::PMK_Error) +                          ? diag::err_pragma_message +                          : diag::warn_pragma_message) << MessageString; + +    // If the pragma is lexically sound, notify any interested PPCallbacks. +    if (PPCallbacks *Callbacks = PP.getPPCallbacks()) +      Callbacks->PragmaMessage(MessageLoc, Namespace, Kind, MessageString); +  } +}; + +/// Handle the clang \#pragma module import extension. The syntax is: +/// \code +///   #pragma clang module import some.module.name +/// \endcode +struct PragmaModuleImportHandler : public PragmaHandler { +  PragmaModuleImportHandler() : PragmaHandler("import") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    SourceLocation ImportLoc = Tok.getLocation(); + +    // Read the module name. +    llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8> +        ModuleName; +    if (LexModuleName(PP, Tok, ModuleName)) +      return; + +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + +    // If we have a non-empty module path, load the named module. +    Module *Imported = +        PP.getModuleLoader().loadModule(ImportLoc, ModuleName, Module::Hidden, +                                      /*IsInclusionDirective=*/false); +    if (!Imported) +      return; + +    PP.makeModuleVisible(Imported, ImportLoc); +    PP.EnterAnnotationToken(SourceRange(ImportLoc, ModuleName.back().second), +                            tok::annot_module_include, Imported); +    if (auto *CB = PP.getPPCallbacks()) +      CB->moduleImport(ImportLoc, ModuleName, Imported); +  } +}; + +/// Handle the clang \#pragma module begin extension. The syntax is: +/// \code +///   #pragma clang module begin some.module.name +///   ... +///   #pragma clang module end +/// \endcode +struct PragmaModuleBeginHandler : public PragmaHandler { +  PragmaModuleBeginHandler() : PragmaHandler("begin") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    SourceLocation BeginLoc = Tok.getLocation(); + +    // Read the module name. +    llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8> +        ModuleName; +    if (LexModuleName(PP, Tok, ModuleName)) +      return; + +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + +    // We can only enter submodules of the current module. +    StringRef Current = PP.getLangOpts().CurrentModule; +    if (ModuleName.front().first->getName() != Current) { +      PP.Diag(ModuleName.front().second, diag::err_pp_module_begin_wrong_module) +        << ModuleName.front().first << (ModuleName.size() > 1) +        << Current.empty() << Current; +      return; +    } + +    // Find the module we're entering. We require that a module map for it +    // be loaded or implicitly loadable. +    auto &HSI = PP.getHeaderSearchInfo(); +    Module *M = HSI.lookupModule(Current); +    if (!M) { +      PP.Diag(ModuleName.front().second, +              diag::err_pp_module_begin_no_module_map) << Current; +      return; +    } +    for (unsigned I = 1; I != ModuleName.size(); ++I) { +      auto *NewM = M->findOrInferSubmodule(ModuleName[I].first->getName()); +      if (!NewM) { +        PP.Diag(ModuleName[I].second, diag::err_pp_module_begin_no_submodule) +          << M->getFullModuleName() << ModuleName[I].first; +        return; +      } +      M = NewM; +    } + +    // If the module isn't available, it doesn't make sense to enter it. +    if (Preprocessor::checkModuleIsAvailable( +            PP.getLangOpts(), PP.getTargetInfo(), PP.getDiagnostics(), M)) { +      PP.Diag(BeginLoc, diag::note_pp_module_begin_here) +        << M->getTopLevelModuleName(); +      return; +    } + +    // Enter the scope of the submodule. +    PP.EnterSubmodule(M, BeginLoc, /*ForPragma*/true); +    PP.EnterAnnotationToken(SourceRange(BeginLoc, ModuleName.back().second), +                            tok::annot_module_begin, M); +  } +}; + +/// Handle the clang \#pragma module end extension. +struct PragmaModuleEndHandler : public PragmaHandler { +  PragmaModuleEndHandler() : PragmaHandler("end") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    SourceLocation Loc = Tok.getLocation(); + +    PP.LexUnexpandedToken(Tok); +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + +    Module *M = PP.LeaveSubmodule(/*ForPragma*/true); +    if (M) +      PP.EnterAnnotationToken(SourceRange(Loc), tok::annot_module_end, M); +    else +      PP.Diag(Loc, diag::err_pp_module_end_without_module_begin); +  } +}; + +/// Handle the clang \#pragma module build extension. +struct PragmaModuleBuildHandler : public PragmaHandler { +  PragmaModuleBuildHandler() : PragmaHandler("build") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    PP.HandlePragmaModuleBuild(Tok); +  } +}; + +/// Handle the clang \#pragma module load extension. +struct PragmaModuleLoadHandler : public PragmaHandler { +  PragmaModuleLoadHandler() : PragmaHandler("load") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &Tok) override { +    SourceLocation Loc = Tok.getLocation(); + +    // Read the module name. +    llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8> +        ModuleName; +    if (LexModuleName(PP, Tok, ModuleName)) +      return; + +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + +    // Load the module, don't make it visible. +    PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden, +                                    /*IsInclusionDirective=*/false); +  } +}; + +/// PragmaPushMacroHandler - "\#pragma push_macro" saves the value of the +/// macro on the top of the stack. +struct PragmaPushMacroHandler : public PragmaHandler { +  PragmaPushMacroHandler() : PragmaHandler("push_macro") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &PushMacroTok) override { +    PP.HandlePragmaPushMacro(PushMacroTok); +  } +}; + +/// PragmaPopMacroHandler - "\#pragma pop_macro" sets the value of the +/// macro to the value on the top of the stack. +struct PragmaPopMacroHandler : public PragmaHandler { +  PragmaPopMacroHandler() : PragmaHandler("pop_macro") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &PopMacroTok) override { +    PP.HandlePragmaPopMacro(PopMacroTok); +  } +}; + +/// PragmaARCCFCodeAuditedHandler - +///   \#pragma clang arc_cf_code_audited begin/end +struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { +  PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &NameTok) override { +    SourceLocation Loc = NameTok.getLocation(); +    bool IsBegin; + +    Token Tok; + +    // Lex the 'begin' or 'end'. +    PP.LexUnexpandedToken(Tok); +    const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo(); +    if (BeginEnd && BeginEnd->isStr("begin")) { +      IsBegin = true; +    } else if (BeginEnd && BeginEnd->isStr("end")) { +      IsBegin = false; +    } else { +      PP.Diag(Tok.getLocation(), diag::err_pp_arc_cf_code_audited_syntax); +      return; +    } + +    // Verify that this is followed by EOD. +    PP.LexUnexpandedToken(Tok); +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + +    // The start location of the active audit. +    SourceLocation BeginLoc = PP.getPragmaARCCFCodeAuditedInfo().second; + +    // The start location we want after processing this. +    SourceLocation NewLoc; + +    if (IsBegin) { +      // Complain about attempts to re-enter an audit. +      if (BeginLoc.isValid()) { +        PP.Diag(Loc, diag::err_pp_double_begin_of_arc_cf_code_audited); +        PP.Diag(BeginLoc, diag::note_pragma_entered_here); +      } +      NewLoc = Loc; +    } else { +      // Complain about attempts to leave an audit that doesn't exist. +      if (!BeginLoc.isValid()) { +        PP.Diag(Loc, diag::err_pp_unmatched_end_of_arc_cf_code_audited); +        return; +      } +      NewLoc = SourceLocation(); +    } + +    PP.setPragmaARCCFCodeAuditedInfo(NameTok.getIdentifierInfo(), NewLoc); +  } +}; + +/// PragmaAssumeNonNullHandler - +///   \#pragma clang assume_nonnull begin/end +struct PragmaAssumeNonNullHandler : public PragmaHandler { +  PragmaAssumeNonNullHandler() : PragmaHandler("assume_nonnull") {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &NameTok) override { +    SourceLocation Loc = NameTok.getLocation(); +    bool IsBegin; + +    Token Tok; + +    // Lex the 'begin' or 'end'. +    PP.LexUnexpandedToken(Tok); +    const IdentifierInfo *BeginEnd = Tok.getIdentifierInfo(); +    if (BeginEnd && BeginEnd->isStr("begin")) { +      IsBegin = true; +    } else if (BeginEnd && BeginEnd->isStr("end")) { +      IsBegin = false; +    } else { +      PP.Diag(Tok.getLocation(), diag::err_pp_assume_nonnull_syntax); +      return; +    } + +    // Verify that this is followed by EOD. +    PP.LexUnexpandedToken(Tok); +    if (Tok.isNot(tok::eod)) +      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma"; + +    // The start location of the active audit. +    SourceLocation BeginLoc = PP.getPragmaAssumeNonNullLoc(); + +    // The start location we want after processing this. +    SourceLocation NewLoc; +    PPCallbacks *Callbacks = PP.getPPCallbacks(); + +    if (IsBegin) { +      // Complain about attempts to re-enter an audit. +      if (BeginLoc.isValid()) { +        PP.Diag(Loc, diag::err_pp_double_begin_of_assume_nonnull); +        PP.Diag(BeginLoc, diag::note_pragma_entered_here); +      } +      NewLoc = Loc; +      if (Callbacks) +        Callbacks->PragmaAssumeNonNullBegin(NewLoc); +    } else { +      // Complain about attempts to leave an audit that doesn't exist. +      if (!BeginLoc.isValid()) { +        PP.Diag(Loc, diag::err_pp_unmatched_end_of_assume_nonnull); +        return; +      } +      NewLoc = SourceLocation(); +      if (Callbacks) +        Callbacks->PragmaAssumeNonNullEnd(NewLoc); +    } + +    PP.setPragmaAssumeNonNullLoc(NewLoc); +  } +}; + +/// Handle "\#pragma region [...]" +/// +/// The syntax is +/// \code +///   #pragma region [optional name] +///   #pragma endregion [optional comment] +/// \endcode +/// +/// \note This is +/// <a href="http://msdn.microsoft.com/en-us/library/b6xkz944(v=vs.80).aspx">editor-only</a> +/// pragma, just skipped by compiler. +struct PragmaRegionHandler : public PragmaHandler { +  PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) {} + +  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, +                    Token &NameTok) override { +    // #pragma region: endregion matches can be verified +    // __pragma(region): no sense, but ignored by msvc +    // _Pragma is not valid for MSVC, but there isn't any point +    // to handle a _Pragma differently. +  } +}; + +} // namespace + +/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas: +/// \#pragma GCC poison/system_header/dependency and \#pragma once. +void Preprocessor::RegisterBuiltinPragmas() { +  AddPragmaHandler(new PragmaOnceHandler()); +  AddPragmaHandler(new PragmaMarkHandler()); +  AddPragmaHandler(new PragmaPushMacroHandler()); +  AddPragmaHandler(new PragmaPopMacroHandler()); +  AddPragmaHandler(new PragmaMessageHandler(PPCallbacks::PMK_Message)); + +  // #pragma GCC ... +  AddPragmaHandler("GCC", new PragmaPoisonHandler()); +  AddPragmaHandler("GCC", new PragmaSystemHeaderHandler()); +  AddPragmaHandler("GCC", new PragmaDependencyHandler()); +  AddPragmaHandler("GCC", new PragmaDiagnosticHandler("GCC")); +  AddPragmaHandler("GCC", new PragmaMessageHandler(PPCallbacks::PMK_Warning, +                                                   "GCC")); +  AddPragmaHandler("GCC", new PragmaMessageHandler(PPCallbacks::PMK_Error, +                                                   "GCC")); +  // #pragma clang ... +  AddPragmaHandler("clang", new PragmaPoisonHandler()); +  AddPragmaHandler("clang", new PragmaSystemHeaderHandler()); +  AddPragmaHandler("clang", new PragmaDebugHandler()); +  AddPragmaHandler("clang", new PragmaDependencyHandler()); +  AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang")); +  AddPragmaHandler("clang", new PragmaARCCFCodeAuditedHandler()); +  AddPragmaHandler("clang", new PragmaAssumeNonNullHandler()); + +  // #pragma clang module ... +  auto *ModuleHandler = new PragmaNamespace("module"); +  AddPragmaHandler("clang", ModuleHandler); +  ModuleHandler->AddPragma(new PragmaModuleImportHandler()); +  ModuleHandler->AddPragma(new PragmaModuleBeginHandler()); +  ModuleHandler->AddPragma(new PragmaModuleEndHandler()); +  ModuleHandler->AddPragma(new PragmaModuleBuildHandler()); +  ModuleHandler->AddPragma(new PragmaModuleLoadHandler()); + +  // Add region pragmas. +  AddPragmaHandler(new PragmaRegionHandler("region")); +  AddPragmaHandler(new PragmaRegionHandler("endregion")); + +  // MS extensions. +  if (LangOpts.MicrosoftExt) { +    AddPragmaHandler(new PragmaWarningHandler()); +    AddPragmaHandler(new PragmaExecCharsetHandler()); +    AddPragmaHandler(new PragmaIncludeAliasHandler()); +    AddPragmaHandler(new PragmaHdrstopHandler()); +  } + +  // Pragmas added by plugins +  for (PragmaHandlerRegistry::iterator it = PragmaHandlerRegistry::begin(), +                                       ie = PragmaHandlerRegistry::end(); +       it != ie; ++it) { +    AddPragmaHandler(it->instantiate().release()); +  } +} + +/// Ignore all pragmas, useful for modes such as -Eonly which would otherwise +/// warn about those pragmas being unknown. +void Preprocessor::IgnorePragmas() { +  AddPragmaHandler(new EmptyPragmaHandler()); +  // Also ignore all pragmas in all namespaces created +  // in Preprocessor::RegisterBuiltinPragmas(). +  AddPragmaHandler("GCC", new EmptyPragmaHandler()); +  AddPragmaHandler("clang", new EmptyPragmaHandler()); +} diff --git a/clang/lib/Lex/PreprocessingRecord.cpp b/clang/lib/Lex/PreprocessingRecord.cpp new file mode 100644 index 000000000000..115256db4809 --- /dev/null +++ b/clang/lib/Lex/PreprocessingRecord.cpp @@ -0,0 +1,516 @@ +//===- PreprocessingRecord.cpp - Record of Preprocessing ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements the PreprocessingRecord class, which maintains a record +//  of what occurred during preprocessing, and its helpers. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/PreprocessingRecord.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Capacity.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstring> +#include <iterator> +#include <utility> +#include <vector> + +using namespace clang; + +ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() = +    default; + +InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, +                                       InclusionKind Kind, StringRef FileName, +                                       bool InQuotes, bool ImportedModule, +                                       const FileEntry *File, SourceRange Range) +    : PreprocessingDirective(InclusionDirectiveKind, Range), InQuotes(InQuotes), +      Kind(Kind), ImportedModule(ImportedModule), File(File) { +  char *Memory = (char *)PPRec.Allocate(FileName.size() + 1, alignof(char)); +  memcpy(Memory, FileName.data(), FileName.size()); +  Memory[FileName.size()] = 0; +  this->FileName = StringRef(Memory, FileName.size()); +} + +PreprocessingRecord::PreprocessingRecord(SourceManager &SM) : SourceMgr(SM) {} + +/// Returns a pair of [Begin, End) iterators of preprocessed entities +/// that source range \p Range encompasses. +llvm::iterator_range<PreprocessingRecord::iterator> +PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) { +  if (Range.isInvalid()) +    return llvm::make_range(iterator(), iterator()); + +  if (CachedRangeQuery.Range == Range) { +    return llvm::make_range(iterator(this, CachedRangeQuery.Result.first), +                            iterator(this, CachedRangeQuery.Result.second)); +  } + +  std::pair<int, int> Res = getPreprocessedEntitiesInRangeSlow(Range); + +  CachedRangeQuery.Range = Range; +  CachedRangeQuery.Result = Res; + +  return llvm::make_range(iterator(this, Res.first), +                          iterator(this, Res.second)); +} + +static bool isPreprocessedEntityIfInFileID(PreprocessedEntity *PPE, FileID FID, +                                           SourceManager &SM) { +  assert(FID.isValid()); +  if (!PPE) +    return false; + +  SourceLocation Loc = PPE->getSourceRange().getBegin(); +  if (Loc.isInvalid()) +    return false; + +  return SM.isInFileID(SM.getFileLoc(Loc), FID); +} + +/// Returns true if the preprocessed entity that \arg PPEI iterator +/// points to is coming from the file \arg FID. +/// +/// Can be used to avoid implicit deserializations of preallocated +/// preprocessed entities if we only care about entities of a specific file +/// and not from files \#included in the range given at +/// \see getPreprocessedEntitiesInRange. +bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { +  if (FID.isInvalid()) +    return false; + +  int Pos = std::distance(iterator(this, 0), PPEI); +  if (Pos < 0) { +    if (unsigned(-Pos-1) >= LoadedPreprocessedEntities.size()) { +      assert(0 && "Out-of bounds loaded preprocessed entity"); +      return false; +    } +    assert(ExternalSource && "No external source to load from"); +    unsigned LoadedIndex = LoadedPreprocessedEntities.size()+Pos; +    if (PreprocessedEntity *PPE = LoadedPreprocessedEntities[LoadedIndex]) +      return isPreprocessedEntityIfInFileID(PPE, FID, SourceMgr); + +    // See if the external source can see if the entity is in the file without +    // deserializing it. +    Optional<bool> IsInFile = +        ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID); +    if (IsInFile.hasValue()) +      return IsInFile.getValue(); + +    // The external source did not provide a definite answer, go and deserialize +    // the entity to check it. +    return isPreprocessedEntityIfInFileID( +                                       getLoadedPreprocessedEntity(LoadedIndex), +                                          FID, SourceMgr); +  } + +  if (unsigned(Pos) >= PreprocessedEntities.size()) { +    assert(0 && "Out-of bounds local preprocessed entity"); +    return false; +  } +  return isPreprocessedEntityIfInFileID(PreprocessedEntities[Pos], +                                        FID, SourceMgr); +} + +/// Returns a pair of [Begin, End) iterators of preprocessed entities +/// that source range \arg R encompasses. +std::pair<int, int> +PreprocessingRecord::getPreprocessedEntitiesInRangeSlow(SourceRange Range) { +  assert(Range.isValid()); +  assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin())); + +  std::pair<unsigned, unsigned> +    Local = findLocalPreprocessedEntitiesInRange(Range); + +  // Check if range spans local entities. +  if (!ExternalSource || SourceMgr.isLocalSourceLocation(Range.getBegin())) +    return std::make_pair(Local.first, Local.second); + +  std::pair<unsigned, unsigned> +    Loaded = ExternalSource->findPreprocessedEntitiesInRange(Range); + +  // Check if range spans local entities. +  if (Loaded.first == Loaded.second) +    return std::make_pair(Local.first, Local.second); + +  unsigned TotalLoaded = LoadedPreprocessedEntities.size(); + +  // Check if range spans loaded entities. +  if (Local.first == Local.second) +    return std::make_pair(int(Loaded.first)-TotalLoaded, +                          int(Loaded.second)-TotalLoaded); + +  // Range spands loaded and local entities. +  return std::make_pair(int(Loaded.first)-TotalLoaded, Local.second); +} + +std::pair<unsigned, unsigned> +PreprocessingRecord::findLocalPreprocessedEntitiesInRange( +                                                      SourceRange Range) const { +  if (Range.isInvalid()) +    return std::make_pair(0,0); +  assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin())); + +  unsigned Begin = findBeginLocalPreprocessedEntity(Range.getBegin()); +  unsigned End = findEndLocalPreprocessedEntity(Range.getEnd()); +  return std::make_pair(Begin, End); +} + +namespace { + +template <SourceLocation (SourceRange::*getRangeLoc)() const> +struct PPEntityComp { +  const SourceManager &SM; + +  explicit PPEntityComp(const SourceManager &SM) : SM(SM) {} + +  bool operator()(PreprocessedEntity *L, PreprocessedEntity *R) const { +    SourceLocation LHS = getLoc(L); +    SourceLocation RHS = getLoc(R); +    return SM.isBeforeInTranslationUnit(LHS, RHS); +  } + +  bool operator()(PreprocessedEntity *L, SourceLocation RHS) const { +    SourceLocation LHS = getLoc(L); +    return SM.isBeforeInTranslationUnit(LHS, RHS); +  } + +  bool operator()(SourceLocation LHS, PreprocessedEntity *R) const { +    SourceLocation RHS = getLoc(R); +    return SM.isBeforeInTranslationUnit(LHS, RHS); +  } + +  SourceLocation getLoc(PreprocessedEntity *PPE) const { +    SourceRange Range = PPE->getSourceRange(); +    return (Range.*getRangeLoc)(); +  } +}; + +} // namespace + +unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity( +                                                     SourceLocation Loc) const { +  if (SourceMgr.isLoadedSourceLocation(Loc)) +    return 0; + +  size_t Count = PreprocessedEntities.size(); +  size_t Half; +  std::vector<PreprocessedEntity *>::const_iterator +    First = PreprocessedEntities.begin(); +  std::vector<PreprocessedEntity *>::const_iterator I; + +  // Do a binary search manually instead of using std::lower_bound because +  // The end locations of entities may be unordered (when a macro expansion +  // is inside another macro argument), but for this case it is not important +  // whether we get the first macro expansion or its containing macro. +  while (Count > 0) { +    Half = Count/2; +    I = First; +    std::advance(I, Half); +    if (SourceMgr.isBeforeInTranslationUnit((*I)->getSourceRange().getEnd(), +                                            Loc)){ +      First = I; +      ++First; +      Count = Count - Half - 1; +    } else +      Count = Half; +  } + +  return First - PreprocessedEntities.begin(); +} + +unsigned +PreprocessingRecord::findEndLocalPreprocessedEntity(SourceLocation Loc) const { +  if (SourceMgr.isLoadedSourceLocation(Loc)) +    return 0; + +  auto I = llvm::upper_bound(PreprocessedEntities, Loc, +                             PPEntityComp<&SourceRange::getBegin>(SourceMgr)); +  return I - PreprocessedEntities.begin(); +} + +PreprocessingRecord::PPEntityID +PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) { +  assert(Entity); +  SourceLocation BeginLoc = Entity->getSourceRange().getBegin(); + +  if (isa<MacroDefinitionRecord>(Entity)) { +    assert((PreprocessedEntities.empty() || +            !SourceMgr.isBeforeInTranslationUnit( +                BeginLoc, +                PreprocessedEntities.back()->getSourceRange().getBegin())) && +           "a macro definition was encountered out-of-order"); +    PreprocessedEntities.push_back(Entity); +    return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false); +  } + +  // Check normal case, this entity begin location is after the previous one. +  if (PreprocessedEntities.empty() || +      !SourceMgr.isBeforeInTranslationUnit(BeginLoc, +                   PreprocessedEntities.back()->getSourceRange().getBegin())) { +    PreprocessedEntities.push_back(Entity); +    return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false); +  } + +  // The entity's location is not after the previous one; this can happen with +  // include directives that form the filename using macros, e.g: +  // "#include MACRO(STUFF)" +  // or with macro expansions inside macro arguments where the arguments are +  // not expanded in the same order as listed, e.g: +  // \code +  //  #define M1 1 +  //  #define M2 2 +  //  #define FM(x,y) y x +  //  FM(M1, M2) +  // \endcode + +  using pp_iter = std::vector<PreprocessedEntity *>::iterator; + +  // Usually there are few macro expansions when defining the filename, do a +  // linear search for a few entities. +  unsigned count = 0; +  for (pp_iter RI    = PreprocessedEntities.end(), +               Begin = PreprocessedEntities.begin(); +       RI != Begin && count < 4; --RI, ++count) { +    pp_iter I = RI; +    --I; +    if (!SourceMgr.isBeforeInTranslationUnit(BeginLoc, +                                           (*I)->getSourceRange().getBegin())) { +      pp_iter insertI = PreprocessedEntities.insert(RI, Entity); +      return getPPEntityID(insertI - PreprocessedEntities.begin(), +                           /*isLoaded=*/false); +    } +  } + +  // Linear search unsuccessful. Do a binary search. +  pp_iter I = +      llvm::upper_bound(PreprocessedEntities, BeginLoc, +                        PPEntityComp<&SourceRange::getBegin>(SourceMgr)); +  pp_iter insertI = PreprocessedEntities.insert(I, Entity); +  return getPPEntityID(insertI - PreprocessedEntities.begin(), +                       /*isLoaded=*/false); +} + +void PreprocessingRecord::SetExternalSource( +                                    ExternalPreprocessingRecordSource &Source) { +  assert(!ExternalSource && +         "Preprocessing record already has an external source"); +  ExternalSource = &Source; +} + +unsigned PreprocessingRecord::allocateLoadedEntities(unsigned NumEntities) { +  unsigned Result = LoadedPreprocessedEntities.size(); +  LoadedPreprocessedEntities.resize(LoadedPreprocessedEntities.size() +                                    + NumEntities); +  return Result; +} + +unsigned PreprocessingRecord::allocateSkippedRanges(unsigned NumRanges) { +  unsigned Result = SkippedRanges.size(); +  SkippedRanges.resize(SkippedRanges.size() + NumRanges); +  SkippedRangesAllLoaded = false; +  return Result; +} + +void PreprocessingRecord::ensureSkippedRangesLoaded() { +  if (SkippedRangesAllLoaded || !ExternalSource) +    return; +  for (unsigned Index = 0; Index != SkippedRanges.size(); ++Index) { +    if (SkippedRanges[Index].isInvalid()) +      SkippedRanges[Index] = ExternalSource->ReadSkippedRange(Index); +  } +  SkippedRangesAllLoaded = true; +} + +void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro, +                                                  MacroDefinitionRecord *Def) { +  MacroDefinitions[Macro] = Def; +} + +/// Retrieve the preprocessed entity at the given ID. +PreprocessedEntity *PreprocessingRecord::getPreprocessedEntity(PPEntityID PPID){ +  if (PPID.ID < 0) { +    unsigned Index = -PPID.ID - 1; +    assert(Index < LoadedPreprocessedEntities.size() && +           "Out-of bounds loaded preprocessed entity"); +    return getLoadedPreprocessedEntity(Index); +  } + +  if (PPID.ID == 0) +    return nullptr; +  unsigned Index = PPID.ID - 1; +  assert(Index < PreprocessedEntities.size() && +         "Out-of bounds local preprocessed entity"); +  return PreprocessedEntities[Index]; +} + +/// Retrieve the loaded preprocessed entity at the given index. +PreprocessedEntity * +PreprocessingRecord::getLoadedPreprocessedEntity(unsigned Index) { +  assert(Index < LoadedPreprocessedEntities.size() && +         "Out-of bounds loaded preprocessed entity"); +  assert(ExternalSource && "No external source to load from"); +  PreprocessedEntity *&Entity = LoadedPreprocessedEntities[Index]; +  if (!Entity) { +    Entity = ExternalSource->ReadPreprocessedEntity(Index); +    if (!Entity) // Failed to load. +      Entity = new (*this) +         PreprocessedEntity(PreprocessedEntity::InvalidKind, SourceRange()); +  } +  return Entity; +} + +MacroDefinitionRecord * +PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) { +  llvm::DenseMap<const MacroInfo *, MacroDefinitionRecord *>::iterator Pos = +      MacroDefinitions.find(MI); +  if (Pos == MacroDefinitions.end()) +    return nullptr; + +  return Pos->second; +} + +void PreprocessingRecord::addMacroExpansion(const Token &Id, +                                            const MacroInfo *MI, +                                            SourceRange Range) { +  // We don't record nested macro expansions. +  if (Id.getLocation().isMacroID()) +    return; + +  if (MI->isBuiltinMacro()) +    addPreprocessedEntity(new (*this) +                              MacroExpansion(Id.getIdentifierInfo(), Range)); +  else if (MacroDefinitionRecord *Def = findMacroDefinition(MI)) +    addPreprocessedEntity(new (*this) MacroExpansion(Def, Range)); +} + +void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok, +                                const MacroDefinition &MD) { +  // This is not actually a macro expansion but record it as a macro reference. +  if (MD) +    addMacroExpansion(MacroNameTok, MD.getMacroInfo(), +                      MacroNameTok.getLocation()); +} + +void PreprocessingRecord::Ifndef(SourceLocation Loc, const Token &MacroNameTok, +                                 const MacroDefinition &MD) { +  // This is not actually a macro expansion but record it as a macro reference. +  if (MD) +    addMacroExpansion(MacroNameTok, MD.getMacroInfo(), +                      MacroNameTok.getLocation()); +} + +void PreprocessingRecord::Defined(const Token &MacroNameTok, +                                  const MacroDefinition &MD, +                                  SourceRange Range) { +  // This is not actually a macro expansion but record it as a macro reference. +  if (MD) +    addMacroExpansion(MacroNameTok, MD.getMacroInfo(), +                      MacroNameTok.getLocation()); +} + +void PreprocessingRecord::SourceRangeSkipped(SourceRange Range, +                                             SourceLocation EndifLoc) { +  assert(Range.isValid()); +  SkippedRanges.emplace_back(Range.getBegin(), EndifLoc); +} + +void PreprocessingRecord::MacroExpands(const Token &Id, +                                       const MacroDefinition &MD, +                                       SourceRange Range, +                                       const MacroArgs *Args) { +  addMacroExpansion(Id, MD.getMacroInfo(), Range); +} + +void PreprocessingRecord::MacroDefined(const Token &Id, +                                       const MacroDirective *MD) { +  const MacroInfo *MI = MD->getMacroInfo(); +  SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc()); +  MacroDefinitionRecord *Def = +      new (*this) MacroDefinitionRecord(Id.getIdentifierInfo(), R); +  addPreprocessedEntity(Def); +  MacroDefinitions[MI] = Def; +} + +void PreprocessingRecord::MacroUndefined(const Token &Id, +                                         const MacroDefinition &MD, +                                         const MacroDirective *Undef) { +  MD.forAllDefinitions([&](MacroInfo *MI) { MacroDefinitions.erase(MI); }); +} + +void PreprocessingRecord::InclusionDirective( +    SourceLocation HashLoc, +    const Token &IncludeTok, +    StringRef FileName, +    bool IsAngled, +    CharSourceRange FilenameRange, +    const FileEntry *File, +    StringRef SearchPath, +    StringRef RelativePath, +    const Module *Imported, +    SrcMgr::CharacteristicKind FileType) { +  InclusionDirective::InclusionKind Kind = InclusionDirective::Include; + +  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { +  case tok::pp_include: +    Kind = InclusionDirective::Include; +    break; + +  case tok::pp_import: +    Kind = InclusionDirective::Import; +    break; + +  case tok::pp_include_next: +    Kind = InclusionDirective::IncludeNext; +    break; + +  case tok::pp___include_macros: +    Kind = InclusionDirective::IncludeMacros; +    break; + +  default: +    llvm_unreachable("Unknown include directive kind"); +  } + +  SourceLocation EndLoc; +  if (!IsAngled) { +    EndLoc = FilenameRange.getBegin(); +  } else { +    EndLoc = FilenameRange.getEnd(); +    if (FilenameRange.isCharRange()) +      EndLoc = EndLoc.getLocWithOffset(-1); // the InclusionDirective expects +                                            // a token range. +  } +  clang::InclusionDirective *ID = +      new (*this) clang::InclusionDirective(*this, Kind, FileName, !IsAngled, +                                            (bool)Imported, File, +                                            SourceRange(HashLoc, EndLoc)); +  addPreprocessedEntity(ID); +} + +size_t PreprocessingRecord::getTotalMemory() const { +  return BumpAlloc.getTotalMemory() +    + llvm::capacity_in_bytes(MacroDefinitions) +    + llvm::capacity_in_bytes(PreprocessedEntities) +    + llvm::capacity_in_bytes(LoadedPreprocessedEntities) +    + llvm::capacity_in_bytes(SkippedRanges); +} diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp new file mode 100644 index 000000000000..82007732a9b1 --- /dev/null +++ b/clang/lib/Lex/Preprocessor.cpp @@ -0,0 +1,1401 @@ +//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements the Preprocessor interface. +// +//===----------------------------------------------------------------------===// +// +// Options to support: +//   -H       - Print the name of each header file used. +//   -d[DNI] - Dump various things. +//   -fworking-directory - #line's with preprocessor's working dir. +//   -fpreprocessed +//   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD +//   -W* +//   -w +// +// Messages to emit: +//   "Multiple include guards may be useful for:\n" +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemStatCache.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/Module.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/ExternalPreprocessorSource.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/MacroArgs.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Pragma.h" +#include "clang/Lex/PreprocessingRecord.h" +#include "clang/Lex/PreprocessorLexer.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "clang/Lex/ScratchBuffer.h" +#include "clang/Lex/Token.h" +#include "clang/Lex/TokenLexer.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Capacity.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +using namespace clang; + +LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) + +ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; + +Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, +                           DiagnosticsEngine &diags, LangOptions &opts, +                           SourceManager &SM, HeaderSearch &Headers, +                           ModuleLoader &TheModuleLoader, +                           IdentifierInfoLookup *IILookup, bool OwnsHeaders, +                           TranslationUnitKind TUKind) +    : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), +      FileMgr(Headers.getFileMgr()), SourceMgr(SM), +      ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), +      TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), +      // As the language options may have not been loaded yet (when +      // deserializing an ASTUnit), adding keywords to the identifier table is +      // deferred to Preprocessor::Initialize(). +      Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())), +      TUKind(TUKind), SkipMainFilePreamble(0, true), +      CurSubmoduleState(&NullSubmoduleState) { +  OwnsHeaderSearch = OwnsHeaders; + +  // Default to discarding comments. +  KeepComments = false; +  KeepMacroComments = false; +  SuppressIncludeNotFoundError = false; + +  // Macro expansion is enabled. +  DisableMacroExpansion = false; +  MacroExpansionInDirectivesOverride = false; +  InMacroArgs = false; +  ArgMacro = nullptr; +  InMacroArgPreExpansion = false; +  NumCachedTokenLexers = 0; +  PragmasEnabled = true; +  ParsingIfOrElifDirective = false; +  PreprocessedOutput = false; + +  // We haven't read anything from the external source. +  ReadMacrosFromExternalSource = false; + +  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of +  // a macro. They get unpoisoned where it is allowed. +  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); +  SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); +  if (getLangOpts().CPlusPlus2a) { +    (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); +    SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); +  } else { +    Ident__VA_OPT__ = nullptr; +  } + +  // Initialize the pragma handlers. +  RegisterBuiltinPragmas(); + +  // Initialize builtin macros like __LINE__ and friends. +  RegisterBuiltinMacros(); + +  if(LangOpts.Borland) { +    Ident__exception_info        = getIdentifierInfo("_exception_info"); +    Ident___exception_info       = getIdentifierInfo("__exception_info"); +    Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation"); +    Ident__exception_code        = getIdentifierInfo("_exception_code"); +    Ident___exception_code       = getIdentifierInfo("__exception_code"); +    Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode"); +    Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination"); +    Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); +    Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination"); +  } else { +    Ident__exception_info = Ident__exception_code = nullptr; +    Ident__abnormal_termination = Ident___exception_info = nullptr; +    Ident___exception_code = Ident___abnormal_termination = nullptr; +    Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; +    Ident_AbnormalTermination = nullptr; +  } + +  // If using a PCH where a #pragma hdrstop is expected, start skipping tokens. +  if (usingPCHWithPragmaHdrStop()) +    SkippingUntilPragmaHdrStop = true; + +  // If using a PCH with a through header, start skipping tokens. +  if (!this->PPOpts->PCHThroughHeader.empty() && +      !this->PPOpts->ImplicitPCHInclude.empty()) +    SkippingUntilPCHThroughHeader = true; + +  if (this->PPOpts->GeneratePreamble) +    PreambleConditionalStack.startRecording(); + +  ExcludedConditionalDirectiveSkipMappings = +      this->PPOpts->ExcludedConditionalDirectiveSkipMappings; +  if (ExcludedConditionalDirectiveSkipMappings) +    ExcludedConditionalDirectiveSkipMappings->clear(); +} + +Preprocessor::~Preprocessor() { +  assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); + +  IncludeMacroStack.clear(); + +  // Destroy any macro definitions. +  while (MacroInfoChain *I = MIChainHead) { +    MIChainHead = I->Next; +    I->~MacroInfoChain(); +  } + +  // Free any cached macro expanders. +  // This populates MacroArgCache, so all TokenLexers need to be destroyed +  // before the code below that frees up the MacroArgCache list. +  std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); +  CurTokenLexer.reset(); + +  // Free any cached MacroArgs. +  for (MacroArgs *ArgList = MacroArgCache; ArgList;) +    ArgList = ArgList->deallocate(); + +  // Delete the header search info, if we own it. +  if (OwnsHeaderSearch) +    delete &HeaderInfo; +} + +void Preprocessor::Initialize(const TargetInfo &Target, +                              const TargetInfo *AuxTarget) { +  assert((!this->Target || this->Target == &Target) && +         "Invalid override of target information"); +  this->Target = &Target; + +  assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && +         "Invalid override of aux target information."); +  this->AuxTarget = AuxTarget; + +  // Initialize information about built-ins. +  BuiltinInfo.InitializeTarget(Target, AuxTarget); +  HeaderInfo.setTarget(Target); + +  // Populate the identifier table with info about keywords for the current language. +  Identifiers.AddKeywords(LangOpts); +} + +void Preprocessor::InitializeForModelFile() { +  NumEnteredSourceFiles = 0; + +  // Reset pragmas +  PragmaHandlersBackup = std::move(PragmaHandlers); +  PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef()); +  RegisterBuiltinPragmas(); + +  // Reset PredefinesFileID +  PredefinesFileID = FileID(); +} + +void Preprocessor::FinalizeForModelFile() { +  NumEnteredSourceFiles = 1; + +  PragmaHandlers = std::move(PragmaHandlersBackup); +} + +void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { +  llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" +               << getSpelling(Tok) << "'"; + +  if (!DumpFlags) return; + +  llvm::errs() << "\t"; +  if (Tok.isAtStartOfLine()) +    llvm::errs() << " [StartOfLine]"; +  if (Tok.hasLeadingSpace()) +    llvm::errs() << " [LeadingSpace]"; +  if (Tok.isExpandDisabled()) +    llvm::errs() << " [ExpandDisabled]"; +  if (Tok.needsCleaning()) { +    const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); +    llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) +                 << "']"; +  } + +  llvm::errs() << "\tLoc=<"; +  DumpLocation(Tok.getLocation()); +  llvm::errs() << ">"; +} + +void Preprocessor::DumpLocation(SourceLocation Loc) const { +  Loc.print(llvm::errs(), SourceMgr); +} + +void Preprocessor::DumpMacro(const MacroInfo &MI) const { +  llvm::errs() << "MACRO: "; +  for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { +    DumpToken(MI.getReplacementToken(i)); +    llvm::errs() << "  "; +  } +  llvm::errs() << "\n"; +} + +void Preprocessor::PrintStats() { +  llvm::errs() << "\n*** Preprocessor Stats:\n"; +  llvm::errs() << NumDirectives << " directives found:\n"; +  llvm::errs() << "  " << NumDefined << " #define.\n"; +  llvm::errs() << "  " << NumUndefined << " #undef.\n"; +  llvm::errs() << "  #include/#include_next/#import:\n"; +  llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n"; +  llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n"; +  llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n"; +  llvm::errs() << "  " << NumElse << " #else/#elif.\n"; +  llvm::errs() << "  " << NumEndif << " #endif.\n"; +  llvm::errs() << "  " << NumPragma << " #pragma.\n"; +  llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; + +  llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" +             << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " +             << NumFastMacroExpanded << " on the fast path.\n"; +  llvm::errs() << (NumFastTokenPaste+NumTokenPaste) +             << " token paste (##) operations performed, " +             << NumFastTokenPaste << " on the fast path.\n"; + +  llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; + +  llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory(); +  llvm::errs() << "\n  Macro Expanded Tokens: " +               << llvm::capacity_in_bytes(MacroExpandedTokens); +  llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity(); +  // FIXME: List information for all submodules. +  llvm::errs() << "\n  Macros: " +               << llvm::capacity_in_bytes(CurSubmoduleState->Macros); +  llvm::errs() << "\n  #pragma push_macro Info: " +               << llvm::capacity_in_bytes(PragmaPushMacroInfo); +  llvm::errs() << "\n  Poison Reasons: " +               << llvm::capacity_in_bytes(PoisonReasons); +  llvm::errs() << "\n  Comment Handlers: " +               << llvm::capacity_in_bytes(CommentHandlers) << "\n"; +} + +Preprocessor::macro_iterator +Preprocessor::macro_begin(bool IncludeExternalMacros) const { +  if (IncludeExternalMacros && ExternalSource && +      !ReadMacrosFromExternalSource) { +    ReadMacrosFromExternalSource = true; +    ExternalSource->ReadDefinedMacros(); +  } + +  // Make sure we cover all macros in visible modules. +  for (const ModuleMacro &Macro : ModuleMacros) +    CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); + +  return CurSubmoduleState->Macros.begin(); +} + +size_t Preprocessor::getTotalMemory() const { +  return BP.getTotalMemory() +    + llvm::capacity_in_bytes(MacroExpandedTokens) +    + Predefines.capacity() /* Predefines buffer. */ +    // FIXME: Include sizes from all submodules, and include MacroInfo sizes, +    // and ModuleMacros. +    + llvm::capacity_in_bytes(CurSubmoduleState->Macros) +    + llvm::capacity_in_bytes(PragmaPushMacroInfo) +    + llvm::capacity_in_bytes(PoisonReasons) +    + llvm::capacity_in_bytes(CommentHandlers); +} + +Preprocessor::macro_iterator +Preprocessor::macro_end(bool IncludeExternalMacros) const { +  if (IncludeExternalMacros && ExternalSource && +      !ReadMacrosFromExternalSource) { +    ReadMacrosFromExternalSource = true; +    ExternalSource->ReadDefinedMacros(); +  } + +  return CurSubmoduleState->Macros.end(); +} + +/// Compares macro tokens with a specified token value sequence. +static bool MacroDefinitionEquals(const MacroInfo *MI, +                                  ArrayRef<TokenValue> Tokens) { +  return Tokens.size() == MI->getNumTokens() && +      std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); +} + +StringRef Preprocessor::getLastMacroWithSpelling( +                                    SourceLocation Loc, +                                    ArrayRef<TokenValue> Tokens) const { +  SourceLocation BestLocation; +  StringRef BestSpelling; +  for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); +       I != E; ++I) { +    const MacroDirective::DefInfo +      Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); +    if (!Def || !Def.getMacroInfo()) +      continue; +    if (!Def.getMacroInfo()->isObjectLike()) +      continue; +    if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) +      continue; +    SourceLocation Location = Def.getLocation(); +    // Choose the macro defined latest. +    if (BestLocation.isInvalid() || +        (Location.isValid() && +         SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { +      BestLocation = Location; +      BestSpelling = I->first->getName(); +    } +  } +  return BestSpelling; +} + +void Preprocessor::recomputeCurLexerKind() { +  if (CurLexer) +    CurLexerKind = CLK_Lexer; +  else if (CurTokenLexer) +    CurLexerKind = CLK_TokenLexer; +  else +    CurLexerKind = CLK_CachingLexer; +} + +bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, +                                          unsigned CompleteLine, +                                          unsigned CompleteColumn) { +  assert(File); +  assert(CompleteLine && CompleteColumn && "Starts from 1:1"); +  assert(!CodeCompletionFile && "Already set"); + +  using llvm::MemoryBuffer; + +  // Load the actual file's contents. +  bool Invalid = false; +  const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); +  if (Invalid) +    return true; + +  // Find the byte position of the truncation point. +  const char *Position = Buffer->getBufferStart(); +  for (unsigned Line = 1; Line < CompleteLine; ++Line) { +    for (; *Position; ++Position) { +      if (*Position != '\r' && *Position != '\n') +        continue; + +      // Eat \r\n or \n\r as a single line. +      if ((Position[1] == '\r' || Position[1] == '\n') && +          Position[0] != Position[1]) +        ++Position; +      ++Position; +      break; +    } +  } + +  Position += CompleteColumn - 1; + +  // If pointing inside the preamble, adjust the position at the beginning of +  // the file after the preamble. +  if (SkipMainFilePreamble.first && +      SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { +    if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) +      Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; +  } + +  if (Position > Buffer->getBufferEnd()) +    Position = Buffer->getBufferEnd(); + +  CodeCompletionFile = File; +  CodeCompletionOffset = Position - Buffer->getBufferStart(); + +  auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer( +      Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier()); +  char *NewBuf = NewBuffer->getBufferStart(); +  char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); +  *NewPos = '\0'; +  std::copy(Position, Buffer->getBufferEnd(), NewPos+1); +  SourceMgr.overrideFileContents(File, std::move(NewBuffer)); + +  return false; +} + +void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir, +                                            bool IsAngled) { +  if (CodeComplete) +    CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled); +  setCodeCompletionReached(); +} + +void Preprocessor::CodeCompleteNaturalLanguage() { +  if (CodeComplete) +    CodeComplete->CodeCompleteNaturalLanguage(); +  setCodeCompletionReached(); +} + +/// getSpelling - This method is used to get the spelling of a token into a +/// SmallVector. Note that the returned StringRef may not point to the +/// supplied buffer if a copy can be avoided. +StringRef Preprocessor::getSpelling(const Token &Tok, +                                          SmallVectorImpl<char> &Buffer, +                                          bool *Invalid) const { +  // NOTE: this has to be checked *before* testing for an IdentifierInfo. +  if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { +    // Try the fast path. +    if (const IdentifierInfo *II = Tok.getIdentifierInfo()) +      return II->getName(); +  } + +  // Resize the buffer if we need to copy into it. +  if (Tok.needsCleaning()) +    Buffer.resize(Tok.getLength()); + +  const char *Ptr = Buffer.data(); +  unsigned Len = getSpelling(Tok, Ptr, Invalid); +  return StringRef(Ptr, Len); +} + +/// CreateString - Plop the specified string into a scratch buffer and return a +/// location for it.  If specified, the source location provides a source +/// location for the token. +void Preprocessor::CreateString(StringRef Str, Token &Tok, +                                SourceLocation ExpansionLocStart, +                                SourceLocation ExpansionLocEnd) { +  Tok.setLength(Str.size()); + +  const char *DestPtr; +  SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); + +  if (ExpansionLocStart.isValid()) +    Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, +                                       ExpansionLocEnd, Str.size()); +  Tok.setLocation(Loc); + +  // If this is a raw identifier or a literal token, set the pointer data. +  if (Tok.is(tok::raw_identifier)) +    Tok.setRawIdentifierData(DestPtr); +  else if (Tok.isLiteral()) +    Tok.setLiteralData(DestPtr); +} + +SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) { +  auto &SM = getSourceManager(); +  SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); +  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc); +  bool Invalid = false; +  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); +  if (Invalid) +    return SourceLocation(); + +  // FIXME: We could consider re-using spelling for tokens we see repeatedly. +  const char *DestPtr; +  SourceLocation Spelling = +      ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr); +  return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length)); +} + +Module *Preprocessor::getCurrentModule() { +  if (!getLangOpts().isCompilingModule()) +    return nullptr; + +  return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Initialization Methods +//===----------------------------------------------------------------------===// + +/// EnterMainSourceFile - Enter the specified FileID as the main source file, +/// which implicitly adds the builtin defines etc. +void Preprocessor::EnterMainSourceFile() { +  // We do not allow the preprocessor to reenter the main file.  Doing so will +  // cause FileID's to accumulate information from both runs (e.g. #line +  // information) and predefined macros aren't guaranteed to be set properly. +  assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); +  FileID MainFileID = SourceMgr.getMainFileID(); + +  // If MainFileID is loaded it means we loaded an AST file, no need to enter +  // a main file. +  if (!SourceMgr.isLoadedFileID(MainFileID)) { +    // Enter the main file source buffer. +    EnterSourceFile(MainFileID, nullptr, SourceLocation()); + +    // If we've been asked to skip bytes in the main file (e.g., as part of a +    // precompiled preamble), do so now. +    if (SkipMainFilePreamble.first > 0) +      CurLexer->SetByteOffset(SkipMainFilePreamble.first, +                              SkipMainFilePreamble.second); + +    // Tell the header info that the main file was entered.  If the file is later +    // #imported, it won't be re-entered. +    if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) +      HeaderInfo.IncrementIncludeCount(FE); +  } + +  // Preprocess Predefines to populate the initial preprocessor state. +  std::unique_ptr<llvm::MemoryBuffer> SB = +    llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); +  assert(SB && "Cannot create predefined source buffer"); +  FileID FID = SourceMgr.createFileID(std::move(SB)); +  assert(FID.isValid() && "Could not create FileID for predefines?"); +  setPredefinesFileID(FID); + +  // Start parsing the predefines. +  EnterSourceFile(FID, nullptr, SourceLocation()); + +  if (!PPOpts->PCHThroughHeader.empty()) { +    // Lookup and save the FileID for the through header. If it isn't found +    // in the search path, it's a fatal error. +    const DirectoryLookup *CurDir; +    Optional<FileEntryRef> File = LookupFile( +        SourceLocation(), PPOpts->PCHThroughHeader, +        /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir, +        /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, +        /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr, +        /*IsFrameworkFound=*/nullptr); +    if (!File) { +      Diag(SourceLocation(), diag::err_pp_through_header_not_found) +          << PPOpts->PCHThroughHeader; +      return; +    } +    setPCHThroughHeaderFileID( +        SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User)); +  } + +  // Skip tokens from the Predefines and if needed the main file. +  if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) || +      (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop)) +    SkipTokensWhileUsingPCH(); +} + +void Preprocessor::setPCHThroughHeaderFileID(FileID FID) { +  assert(PCHThroughHeaderFileID.isInvalid() && +         "PCHThroughHeaderFileID already set!"); +  PCHThroughHeaderFileID = FID; +} + +bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) { +  assert(PCHThroughHeaderFileID.isValid() && +         "Invalid PCH through header FileID"); +  return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID); +} + +bool Preprocessor::creatingPCHWithThroughHeader() { +  return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() && +         PCHThroughHeaderFileID.isValid(); +} + +bool Preprocessor::usingPCHWithThroughHeader() { +  return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() && +         PCHThroughHeaderFileID.isValid(); +} + +bool Preprocessor::creatingPCHWithPragmaHdrStop() { +  return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop; +} + +bool Preprocessor::usingPCHWithPragmaHdrStop() { +  return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop; +} + +/// Skip tokens until after the #include of the through header or +/// until after a #pragma hdrstop is seen. Tokens in the predefines file +/// and the main file may be skipped. If the end of the predefines file +/// is reached, skipping continues into the main file. If the end of the +/// main file is reached, it's a fatal error. +void Preprocessor::SkipTokensWhileUsingPCH() { +  bool ReachedMainFileEOF = false; +  bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader; +  bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop; +  Token Tok; +  while (true) { +    bool InPredefines = +        (CurLexer && CurLexer->getFileID() == getPredefinesFileID()); +    switch (CurLexerKind) { +    case CLK_Lexer: +      CurLexer->Lex(Tok); +     break; +    case CLK_TokenLexer: +      CurTokenLexer->Lex(Tok); +      break; +    case CLK_CachingLexer: +      CachingLex(Tok); +      break; +    case CLK_LexAfterModuleImport: +      LexAfterModuleImport(Tok); +      break; +    } +    if (Tok.is(tok::eof) && !InPredefines) { +      ReachedMainFileEOF = true; +      break; +    } +    if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader) +      break; +    if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop) +      break; +  } +  if (ReachedMainFileEOF) { +    if (UsingPCHThroughHeader) +      Diag(SourceLocation(), diag::err_pp_through_header_not_seen) +          << PPOpts->PCHThroughHeader << 1; +    else if (!PPOpts->PCHWithHdrStopCreate) +      Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen); +  } +} + +void Preprocessor::replayPreambleConditionalStack() { +  // Restore the conditional stack from the preamble, if there is one. +  if (PreambleConditionalStack.isReplaying()) { +    assert(CurPPLexer && +           "CurPPLexer is null when calling replayPreambleConditionalStack."); +    CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); +    PreambleConditionalStack.doneReplaying(); +    if (PreambleConditionalStack.reachedEOFWhileSkipping()) +      SkipExcludedConditionalBlock( +          PreambleConditionalStack.SkipInfo->HashTokenLoc, +          PreambleConditionalStack.SkipInfo->IfTokenLoc, +          PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, +          PreambleConditionalStack.SkipInfo->FoundElse, +          PreambleConditionalStack.SkipInfo->ElseLoc); +  } +} + +void Preprocessor::EndSourceFile() { +  // Notify the client that we reached the end of the source file. +  if (Callbacks) +    Callbacks->EndOfMainFile(); +} + +//===----------------------------------------------------------------------===// +// Lexer Event Handling. +//===----------------------------------------------------------------------===// + +/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the +/// identifier information for the token and install it into the token, +/// updating the token kind accordingly. +IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { +  assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); + +  // Look up this token, see if it is a macro, or if it is a language keyword. +  IdentifierInfo *II; +  if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { +    // No cleaning needed, just use the characters from the lexed buffer. +    II = getIdentifierInfo(Identifier.getRawIdentifier()); +  } else { +    // Cleaning needed, alloca a buffer, clean into it, then use the buffer. +    SmallString<64> IdentifierBuffer; +    StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); + +    if (Identifier.hasUCN()) { +      SmallString<64> UCNIdentifierBuffer; +      expandUCNs(UCNIdentifierBuffer, CleanedStr); +      II = getIdentifierInfo(UCNIdentifierBuffer); +    } else { +      II = getIdentifierInfo(CleanedStr); +    } +  } + +  // Update the token info (identifier info and appropriate token kind). +  Identifier.setIdentifierInfo(II); +  if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && +      getSourceManager().isInSystemHeader(Identifier.getLocation())) +    Identifier.setKind(tok::identifier); +  else +    Identifier.setKind(II->getTokenID()); + +  return II; +} + +void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { +  PoisonReasons[II] = DiagID; +} + +void Preprocessor::PoisonSEHIdentifiers(bool Poison) { +  assert(Ident__exception_code && Ident__exception_info); +  assert(Ident___exception_code && Ident___exception_info); +  Ident__exception_code->setIsPoisoned(Poison); +  Ident___exception_code->setIsPoisoned(Poison); +  Ident_GetExceptionCode->setIsPoisoned(Poison); +  Ident__exception_info->setIsPoisoned(Poison); +  Ident___exception_info->setIsPoisoned(Poison); +  Ident_GetExceptionInfo->setIsPoisoned(Poison); +  Ident__abnormal_termination->setIsPoisoned(Poison); +  Ident___abnormal_termination->setIsPoisoned(Poison); +  Ident_AbnormalTermination->setIsPoisoned(Poison); +} + +void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { +  assert(Identifier.getIdentifierInfo() && +         "Can't handle identifiers without identifier info!"); +  llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = +    PoisonReasons.find(Identifier.getIdentifierInfo()); +  if(it == PoisonReasons.end()) +    Diag(Identifier, diag::err_pp_used_poisoned_id); +  else +    Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); +} + +/// Returns a diagnostic message kind for reporting a future keyword as +/// appropriate for the identifier and specified language. +static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, +                                          const LangOptions &LangOpts) { +  assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); + +  if (LangOpts.CPlusPlus) +    return llvm::StringSwitch<diag::kind>(II.getName()) +#define CXX11_KEYWORD(NAME, FLAGS)                                             \ +        .Case(#NAME, diag::warn_cxx11_keyword) +#define CXX2A_KEYWORD(NAME, FLAGS)                                             \ +        .Case(#NAME, diag::warn_cxx2a_keyword) +#include "clang/Basic/TokenKinds.def" +        ; + +  llvm_unreachable( +      "Keyword not known to come from a newer Standard or proposed Standard"); +} + +void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { +  assert(II.isOutOfDate() && "not out of date"); +  getExternalSource()->updateOutOfDateIdentifier(II); +} + +/// HandleIdentifier - This callback is invoked when the lexer reads an +/// identifier.  This callback looks up the identifier in the map and/or +/// potentially macro expands it or turns it into a named token (like 'for'). +/// +/// Note that callers of this method are guarded by checking the +/// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the +/// IdentifierInfo methods that compute these properties will need to change to +/// match. +bool Preprocessor::HandleIdentifier(Token &Identifier) { +  assert(Identifier.getIdentifierInfo() && +         "Can't handle identifiers without identifier info!"); + +  IdentifierInfo &II = *Identifier.getIdentifierInfo(); + +  // If the information about this identifier is out of date, update it from +  // the external source. +  // We have to treat __VA_ARGS__ in a special way, since it gets +  // serialized with isPoisoned = true, but our preprocessor may have +  // unpoisoned it if we're defining a C99 macro. +  if (II.isOutOfDate()) { +    bool CurrentIsPoisoned = false; +    const bool IsSpecialVariadicMacro = +        &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; +    if (IsSpecialVariadicMacro) +      CurrentIsPoisoned = II.isPoisoned(); + +    updateOutOfDateIdentifier(II); +    Identifier.setKind(II.getTokenID()); + +    if (IsSpecialVariadicMacro) +      II.setIsPoisoned(CurrentIsPoisoned); +  } + +  // If this identifier was poisoned, and if it was not produced from a macro +  // expansion, emit an error. +  if (II.isPoisoned() && CurPPLexer) { +    HandlePoisonedIdentifier(Identifier); +  } + +  // If this is a macro to be expanded, do it. +  if (MacroDefinition MD = getMacroDefinition(&II)) { +    auto *MI = MD.getMacroInfo(); +    assert(MI && "macro definition with no macro info?"); +    if (!DisableMacroExpansion) { +      if (!Identifier.isExpandDisabled() && MI->isEnabled()) { +        // C99 6.10.3p10: If the preprocessing token immediately after the +        // macro name isn't a '(', this macro should not be expanded. +        if (!MI->isFunctionLike() || isNextPPTokenLParen()) +          return HandleMacroExpandedIdentifier(Identifier, MD); +      } else { +        // C99 6.10.3.4p2 says that a disabled macro may never again be +        // expanded, even if it's in a context where it could be expanded in the +        // future. +        Identifier.setFlag(Token::DisableExpand); +        if (MI->isObjectLike() || isNextPPTokenLParen()) +          Diag(Identifier, diag::pp_disabled_macro_expansion); +      } +    } +  } + +  // If this identifier is a keyword in a newer Standard or proposed Standard, +  // produce a warning. Don't warn if we're not considering macro expansion, +  // since this identifier might be the name of a macro. +  // FIXME: This warning is disabled in cases where it shouldn't be, like +  //   "#define constexpr constexpr", "int constexpr;" +  if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { +    Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) +        << II.getName(); +    // Don't diagnose this keyword again in this translation unit. +    II.setIsFutureCompatKeyword(false); +  } + +  // If this is an extension token, diagnose its use. +  // We avoid diagnosing tokens that originate from macro definitions. +  // FIXME: This warning is disabled in cases where it shouldn't be, +  // like "#define TY typeof", "TY(1) x". +  if (II.isExtensionToken() && !DisableMacroExpansion) +    Diag(Identifier, diag::ext_token_used); + +  // If this is the 'import' contextual keyword following an '@', note +  // that the next token indicates a module name. +  // +  // Note that we do not treat 'import' as a contextual +  // keyword when we're in a caching lexer, because caching lexers only get +  // used in contexts where import declarations are disallowed. +  // +  // Likewise if this is the C++ Modules TS import keyword. +  if (((LastTokenWasAt && II.isModulesImport()) || +       Identifier.is(tok::kw_import)) && +      !InMacroArgs && !DisableMacroExpansion && +      (getLangOpts().Modules || getLangOpts().DebuggerSupport) && +      CurLexerKind != CLK_CachingLexer) { +    ModuleImportLoc = Identifier.getLocation(); +    ModuleImportPath.clear(); +    ModuleImportExpectsIdentifier = true; +    CurLexerKind = CLK_LexAfterModuleImport; +  } +  return true; +} + +void Preprocessor::Lex(Token &Result) { +  ++LexLevel; + +  // We loop here until a lex function returns a token; this avoids recursion. +  bool ReturnedToken; +  do { +    switch (CurLexerKind) { +    case CLK_Lexer: +      ReturnedToken = CurLexer->Lex(Result); +      break; +    case CLK_TokenLexer: +      ReturnedToken = CurTokenLexer->Lex(Result); +      break; +    case CLK_CachingLexer: +      CachingLex(Result); +      ReturnedToken = true; +      break; +    case CLK_LexAfterModuleImport: +      ReturnedToken = LexAfterModuleImport(Result); +      break; +    } +  } while (!ReturnedToken); + +  if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) { +    // Remember the identifier before code completion token. +    setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); +    setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc()); +    // Set IdenfitierInfo to null to avoid confusing code that handles both +    // identifiers and completion tokens. +    Result.setIdentifierInfo(nullptr); +  } + +  // Update ImportSeqState to track our position within a C++20 import-seq +  // if this token is being produced as a result of phase 4 of translation. +  if (getLangOpts().CPlusPlusModules && LexLevel == 1 && +      !Result.getFlag(Token::IsReinjected)) { +    switch (Result.getKind()) { +    case tok::l_paren: case tok::l_square: case tok::l_brace: +      ImportSeqState.handleOpenBracket(); +      break; +    case tok::r_paren: case tok::r_square: +      ImportSeqState.handleCloseBracket(); +      break; +    case tok::r_brace: +      ImportSeqState.handleCloseBrace(); +      break; +    case tok::semi: +      ImportSeqState.handleSemi(); +      break; +    case tok::header_name: +    case tok::annot_header_unit: +      ImportSeqState.handleHeaderName(); +      break; +    case tok::kw_export: +      ImportSeqState.handleExport(); +      break; +    case tok::identifier: +      if (Result.getIdentifierInfo()->isModulesImport()) { +        ImportSeqState.handleImport(); +        if (ImportSeqState.afterImportSeq()) { +          ModuleImportLoc = Result.getLocation(); +          ModuleImportPath.clear(); +          ModuleImportExpectsIdentifier = true; +          CurLexerKind = CLK_LexAfterModuleImport; +        } +        break; +      } +      LLVM_FALLTHROUGH; +    default: +      ImportSeqState.handleMisc(); +      break; +    } +  } + +  LastTokenWasAt = Result.is(tok::at); +  --LexLevel; +  if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected)) +    OnToken(Result); +} + +/// Lex a header-name token (including one formed from header-name-tokens if +/// \p AllowConcatenation is \c true). +/// +/// \param FilenameTok Filled in with the next token. On success, this will +///        be either a header_name token. On failure, it will be whatever other +///        token was found instead. +/// \param AllowMacroExpansion If \c true, allow the header name to be formed +///        by macro expansion (concatenating tokens as necessary if the first +///        token is a '<'). +/// \return \c true if we reached EOD or EOF while looking for a > token in +///         a concatenated header name and diagnosed it. \c false otherwise. +bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { +  // Lex using header-name tokenization rules if tokens are being lexed from +  // a file. Just grab a token normally if we're in a macro expansion. +  if (CurPPLexer) +    CurPPLexer->LexIncludeFilename(FilenameTok); +  else +    Lex(FilenameTok); + +  // This could be a <foo/bar.h> file coming from a macro expansion.  In this +  // case, glue the tokens together into an angle_string_literal token. +  SmallString<128> FilenameBuffer; +  if (FilenameTok.is(tok::less) && AllowMacroExpansion) { +    bool StartOfLine = FilenameTok.isAtStartOfLine(); +    bool LeadingSpace = FilenameTok.hasLeadingSpace(); +    bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro(); + +    SourceLocation Start = FilenameTok.getLocation(); +    SourceLocation End; +    FilenameBuffer.push_back('<'); + +    // Consume tokens until we find a '>'. +    // FIXME: A header-name could be formed starting or ending with an +    // alternative token. It's not clear whether that's ill-formed in all +    // cases. +    while (FilenameTok.isNot(tok::greater)) { +      Lex(FilenameTok); +      if (FilenameTok.isOneOf(tok::eod, tok::eof)) { +        Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater; +        Diag(Start, diag::note_matching) << tok::less; +        return true; +      } + +      End = FilenameTok.getLocation(); + +      // FIXME: Provide code completion for #includes. +      if (FilenameTok.is(tok::code_completion)) { +        setCodeCompletionReached(); +        Lex(FilenameTok); +        continue; +      } + +      // Append the spelling of this token to the buffer. If there was a space +      // before it, add it now. +      if (FilenameTok.hasLeadingSpace()) +        FilenameBuffer.push_back(' '); + +      // Get the spelling of the token, directly into FilenameBuffer if +      // possible. +      size_t PreAppendSize = FilenameBuffer.size(); +      FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength()); + +      const char *BufPtr = &FilenameBuffer[PreAppendSize]; +      unsigned ActualLen = getSpelling(FilenameTok, BufPtr); + +      // If the token was spelled somewhere else, copy it into FilenameBuffer. +      if (BufPtr != &FilenameBuffer[PreAppendSize]) +        memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); + +      // Resize FilenameBuffer to the correct size. +      if (FilenameTok.getLength() != ActualLen) +        FilenameBuffer.resize(PreAppendSize + ActualLen); +    } + +    FilenameTok.startToken(); +    FilenameTok.setKind(tok::header_name); +    FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine); +    FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace); +    FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro); +    CreateString(FilenameBuffer, FilenameTok, Start, End); +  } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) { +    // Convert a string-literal token of the form " h-char-sequence " +    // (produced by macro expansion) into a header-name token. +    // +    // The rules for header-names don't quite match the rules for +    // string-literals, but all the places where they differ result in +    // undefined behavior, so we can and do treat them the same. +    // +    // A string-literal with a prefix or suffix is not translated into a +    // header-name. This could theoretically be observable via the C++20 +    // context-sensitive header-name formation rules. +    StringRef Str = getSpelling(FilenameTok, FilenameBuffer); +    if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"') +      FilenameTok.setKind(tok::header_name); +  } + +  return false; +} + +/// Collect the tokens of a C++20 pp-import-suffix. +void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { +  // FIXME: For error recovery, consider recognizing attribute syntax here +  // and terminating / diagnosing a missing semicolon if we find anything +  // else? (Can we leave that to the parser?) +  unsigned BracketDepth = 0; +  while (true) { +    Toks.emplace_back(); +    Lex(Toks.back()); + +    switch (Toks.back().getKind()) { +    case tok::l_paren: case tok::l_square: case tok::l_brace: +      ++BracketDepth; +      break; + +    case tok::r_paren: case tok::r_square: case tok::r_brace: +      if (BracketDepth == 0) +        return; +      --BracketDepth; +      break; + +    case tok::semi: +      if (BracketDepth == 0) +        return; +    break; + +    case tok::eof: +      return; + +    default: +      break; +    } +  } +} + + +/// Lex a token following the 'import' contextual keyword. +/// +///     pp-import: [C++20] +///           import header-name pp-import-suffix[opt] ; +///           import header-name-tokens pp-import-suffix[opt] ; +/// [ObjC]    @ import module-name ; +/// [Clang]   import module-name ; +/// +///     header-name-tokens: +///           string-literal +///           < [any sequence of preprocessing-tokens other than >] > +/// +///     module-name: +///           module-name-qualifier[opt] identifier +/// +///     module-name-qualifier +///           module-name-qualifier[opt] identifier . +/// +/// We respond to a pp-import by importing macros from the named module. +bool Preprocessor::LexAfterModuleImport(Token &Result) { +  // Figure out what kind of lexer we actually have. +  recomputeCurLexerKind(); + +  // Lex the next token. The header-name lexing rules are used at the start of +  // a pp-import. +  // +  // For now, we only support header-name imports in C++20 mode. +  // FIXME: Should we allow this in all language modes that support an import +  // declaration as an extension? +  if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { +    if (LexHeaderName(Result)) +      return true; +  } else { +    Lex(Result); +  } + +  // Allocate a holding buffer for a sequence of tokens and introduce it into +  // the token stream. +  auto EnterTokens = [this](ArrayRef<Token> Toks) { +    auto ToksCopy = std::make_unique<Token[]>(Toks.size()); +    std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); +    EnterTokenStream(std::move(ToksCopy), Toks.size(), +                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false); +  }; + +  // Check for a header-name. +  SmallVector<Token, 32> Suffix; +  if (Result.is(tok::header_name)) { +    // Enter the header-name token into the token stream; a Lex action cannot +    // both return a token and cache tokens (doing so would corrupt the token +    // cache if the call to Lex comes from CachingLex / PeekAhead). +    Suffix.push_back(Result); + +    // Consume the pp-import-suffix and expand any macros in it now. We'll add +    // it back into the token stream later. +    CollectPpImportSuffix(Suffix); +    if (Suffix.back().isNot(tok::semi)) { +      // This is not a pp-import after all. +      EnterTokens(Suffix); +      return false; +    } + +    // C++2a [cpp.module]p1: +    //   The ';' preprocessing-token terminating a pp-import shall not have +    //   been produced by macro replacement. +    SourceLocation SemiLoc = Suffix.back().getLocation(); +    if (SemiLoc.isMacroID()) +      Diag(SemiLoc, diag::err_header_import_semi_in_macro); + +    // Reconstitute the import token. +    Token ImportTok; +    ImportTok.startToken(); +    ImportTok.setKind(tok::kw_import); +    ImportTok.setLocation(ModuleImportLoc); +    ImportTok.setIdentifierInfo(getIdentifierInfo("import")); +    ImportTok.setLength(6); + +    auto Action = HandleHeaderIncludeOrImport( +        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); +    switch (Action.Kind) { +    case ImportAction::None: +      break; + +    case ImportAction::ModuleBegin: +      // Let the parser know we're textually entering the module. +      Suffix.emplace_back(); +      Suffix.back().startToken(); +      Suffix.back().setKind(tok::annot_module_begin); +      Suffix.back().setLocation(SemiLoc); +      Suffix.back().setAnnotationEndLoc(SemiLoc); +      Suffix.back().setAnnotationValue(Action.ModuleForHeader); +      LLVM_FALLTHROUGH; + +    case ImportAction::ModuleImport: +    case ImportAction::SkippedModuleImport: +      // We chose to import (or textually enter) the file. Convert the +      // header-name token into a header unit annotation token. +      Suffix[0].setKind(tok::annot_header_unit); +      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); +      Suffix[0].setAnnotationValue(Action.ModuleForHeader); +      // FIXME: Call the moduleImport callback? +      break; +    } + +    EnterTokens(Suffix); +    return false; +  } + +  // The token sequence +  // +  //   import identifier (. identifier)* +  // +  // indicates a module import directive. We already saw the 'import' +  // contextual keyword, so now we're looking for the identifiers. +  if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { +    // We expected to see an identifier here, and we did; continue handling +    // identifiers. +    ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), +                                              Result.getLocation())); +    ModuleImportExpectsIdentifier = false; +    CurLexerKind = CLK_LexAfterModuleImport; +    return true; +  } + +  // If we're expecting a '.' or a ';', and we got a '.', then wait until we +  // see the next identifier. (We can also see a '[[' that begins an +  // attribute-specifier-seq here under the C++ Modules TS.) +  if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { +    ModuleImportExpectsIdentifier = true; +    CurLexerKind = CLK_LexAfterModuleImport; +    return true; +  } + +  // If we didn't recognize a module name at all, this is not a (valid) import. +  if (ModuleImportPath.empty() || Result.is(tok::eof)) +    return true; + +  // Consume the pp-import-suffix and expand any macros in it now, if we're not +  // at the semicolon already. +  SourceLocation SemiLoc = Result.getLocation(); +  if (Result.isNot(tok::semi)) { +    Suffix.push_back(Result); +    CollectPpImportSuffix(Suffix); +    if (Suffix.back().isNot(tok::semi)) { +      // This is not an import after all. +      EnterTokens(Suffix); +      return false; +    } +    SemiLoc = Suffix.back().getLocation(); +  } + +  // Under the Modules TS, the dot is just part of the module name, and not +  // a real hierarchy separator. Flatten such module names now. +  // +  // FIXME: Is this the right level to be performing this transformation? +  std::string FlatModuleName; +  if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) { +    for (auto &Piece : ModuleImportPath) { +      if (!FlatModuleName.empty()) +        FlatModuleName += "."; +      FlatModuleName += Piece.first->getName(); +    } +    SourceLocation FirstPathLoc = ModuleImportPath[0].second; +    ModuleImportPath.clear(); +    ModuleImportPath.push_back( +        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); +  } + +  Module *Imported = nullptr; +  if (getLangOpts().Modules) { +    Imported = TheModuleLoader.loadModule(ModuleImportLoc, +                                          ModuleImportPath, +                                          Module::Hidden, +                                          /*IsInclusionDirective=*/false); +    if (Imported) +      makeModuleVisible(Imported, SemiLoc); +  } +  if (Callbacks) +    Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); + +  if (!Suffix.empty()) { +    EnterTokens(Suffix); +    return false; +  } +  return true; +} + +void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { +  CurSubmoduleState->VisibleModules.setVisible( +      M, Loc, [](Module *) {}, +      [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { +        // FIXME: Include the path in the diagnostic. +        // FIXME: Include the import location for the conflicting module. +        Diag(ModuleImportLoc, diag::warn_module_conflict) +            << Path[0]->getFullModuleName() +            << Conflict->getFullModuleName() +            << Message; +      }); + +  // Add this module to the imports list of the currently-built submodule. +  if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) +    BuildingSubmoduleStack.back().M->Imports.insert(M); +} + +bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, +                                          const char *DiagnosticTag, +                                          bool AllowMacroExpansion) { +  // We need at least one string literal. +  if (Result.isNot(tok::string_literal)) { +    Diag(Result, diag::err_expected_string_literal) +      << /*Source='in...'*/0 << DiagnosticTag; +    return false; +  } + +  // Lex string literal tokens, optionally with macro expansion. +  SmallVector<Token, 4> StrToks; +  do { +    StrToks.push_back(Result); + +    if (Result.hasUDSuffix()) +      Diag(Result, diag::err_invalid_string_udl); + +    if (AllowMacroExpansion) +      Lex(Result); +    else +      LexUnexpandedToken(Result); +  } while (Result.is(tok::string_literal)); + +  // Concatenate and parse the strings. +  StringLiteralParser Literal(StrToks, *this); +  assert(Literal.isAscii() && "Didn't allow wide strings in"); + +  if (Literal.hadError) +    return false; + +  if (Literal.Pascal) { +    Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) +      << /*Source='in...'*/0 << DiagnosticTag; +    return false; +  } + +  String = Literal.GetString(); +  return true; +} + +bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { +  assert(Tok.is(tok::numeric_constant)); +  SmallString<8> IntegerBuffer; +  bool NumberInvalid = false; +  StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); +  if (NumberInvalid) +    return false; +  NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); +  if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) +    return false; +  llvm::APInt APVal(64, 0); +  if (Literal.GetIntegerValue(APVal)) +    return false; +  Lex(Tok); +  Value = APVal.getLimitedValue(); +  return true; +} + +void Preprocessor::addCommentHandler(CommentHandler *Handler) { +  assert(Handler && "NULL comment handler"); +  assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() && +         "Comment handler already registered"); +  CommentHandlers.push_back(Handler); +} + +void Preprocessor::removeCommentHandler(CommentHandler *Handler) { +  std::vector<CommentHandler *>::iterator Pos = +      llvm::find(CommentHandlers, Handler); +  assert(Pos != CommentHandlers.end() && "Comment handler not registered"); +  CommentHandlers.erase(Pos); +} + +bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { +  bool AnyPendingTokens = false; +  for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), +       HEnd = CommentHandlers.end(); +       H != HEnd; ++H) { +    if ((*H)->HandleComment(*this, Comment)) +      AnyPendingTokens = true; +  } +  if (!AnyPendingTokens || getCommentRetentionState()) +    return false; +  Lex(result); +  return true; +} + +ModuleLoader::~ModuleLoader() = default; + +CommentHandler::~CommentHandler() = default; + +CodeCompletionHandler::~CodeCompletionHandler() = default; + +void Preprocessor::createPreprocessingRecord() { +  if (Record) +    return; + +  Record = new PreprocessingRecord(getSourceManager()); +  addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); +} diff --git a/clang/lib/Lex/PreprocessorLexer.cpp b/clang/lib/Lex/PreprocessorLexer.cpp new file mode 100644 index 000000000000..5f6f4a13419b --- /dev/null +++ b/clang/lib/Lex/PreprocessorLexer.cpp @@ -0,0 +1,52 @@ +//===- PreprocessorLexer.cpp - C Language Family Lexer --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements the PreprocessorLexer and Token interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/PreprocessorLexer.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include <cassert> + +using namespace clang; + +void PreprocessorLexer::anchor() {} + +PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) +    : PP(pp), FID(fid) { +  if (pp) +    InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size(); +} + +/// After the preprocessor has parsed a \#include, lex and +/// (potentially) macro expand the filename. +void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { +  assert(ParsingFilename == false && "reentered LexIncludeFilename"); + +  // We are now parsing a filename! +  ParsingFilename = true; + +  // Lex the filename. +  if (LexingRawMode) +    IndirectLex(FilenameTok); +  else +    PP->Lex(FilenameTok); + +  // We should have obtained the filename now. +  ParsingFilename = false; +} + +/// getFileEntry - Return the FileEntry corresponding to this FileID.  Like +/// getFileID(), this only works for lexers with attached preprocessors. +const FileEntry *PreprocessorLexer::getFileEntry() const { +  return PP->getSourceManager().getFileEntryForID(getFileID()); +} diff --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp new file mode 100644 index 000000000000..19ab93ec54b4 --- /dev/null +++ b/clang/lib/Lex/ScratchBuffer.cpp @@ -0,0 +1,83 @@ +//===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//  This file implements the ScratchBuffer interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/ScratchBuffer.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstring> +using namespace clang; + +// ScratchBufSize - The size of each chunk of scratch memory.  Slightly less +//than a page, almost certainly enough for anything. :) +static const unsigned ScratchBufSize = 4060; + +ScratchBuffer::ScratchBuffer(SourceManager &SM) +    : SourceMgr(SM), CurBuffer(nullptr) { +  // Set BytesUsed so that the first call to getToken will require an alloc. +  BytesUsed = ScratchBufSize; +} + +/// getToken - Splat the specified text into a temporary MemoryBuffer and +/// return a SourceLocation that refers to the token.  This is just like the +/// method below, but returns a location that indicates the physloc of the +/// token. +SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, +                                       const char *&DestPtr) { +  if (BytesUsed+Len+2 > ScratchBufSize) +    AllocScratchBuffer(Len+2); +  else { +    // Clear out the source line cache if it's already been computed. +    // FIXME: Allow this to be incrementally extended. +    auto *ContentCache = const_cast<SrcMgr::ContentCache *>( +        SourceMgr.getSLocEntry(SourceMgr.getFileID(BufferStartLoc)) +                 .getFile().getContentCache()); +    ContentCache->SourceLineCache = nullptr; +  } + +  // Prefix the token with a \n, so that it looks like it is the first thing on +  // its own virtual line in caret diagnostics. +  CurBuffer[BytesUsed++] = '\n'; + +  // Return a pointer to the character data. +  DestPtr = CurBuffer+BytesUsed; + +  // Copy the token data into the buffer. +  memcpy(CurBuffer+BytesUsed, Buf, Len); + +  // Remember that we used these bytes. +  BytesUsed += Len+1; + +  // Add a NUL terminator to the token.  This keeps the tokens separated, in +  // case they get relexed, and puts them on their own virtual lines in case a +  // diagnostic points to one. +  CurBuffer[BytesUsed-1] = '\0'; + +  return BufferStartLoc.getLocWithOffset(BytesUsed-Len-1); +} + +void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { +  // Only pay attention to the requested length if it is larger than our default +  // page size.  If it is, we allocate an entire chunk for it.  This is to +  // support gigantic tokens, which almost certainly won't happen. :) +  if (RequestLen < ScratchBufSize) +    RequestLen = ScratchBufSize; + +  // Get scratch buffer. Zero-initialize it so it can be dumped into a PCH file +  // deterministically. +  std::unique_ptr<llvm::WritableMemoryBuffer> OwnBuf = +      llvm::WritableMemoryBuffer::getNewMemBuffer(RequestLen, +                                                  "<scratch space>"); +  CurBuffer = OwnBuf->getBufferStart(); +  FileID FID = SourceMgr.createFileID(std::move(OwnBuf)); +  BufferStartLoc = SourceMgr.getLocForStartOfFile(FID); +  BytesUsed = 0; +} diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp new file mode 100644 index 000000000000..e626cfcc927f --- /dev/null +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -0,0 +1,297 @@ +//===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the TokenConcatenation class. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/TokenConcatenation.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/ErrorHandling.h" +using namespace clang; + + +/// IsStringPrefix - Return true if Str is a string prefix. +/// 'L', 'u', 'U', or 'u8'. Including raw versions. +static bool IsStringPrefix(StringRef Str, bool CPlusPlus11) { + +  if (Str[0] == 'L' || +      (CPlusPlus11 && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) { + +    if (Str.size() == 1) +      return true; // "L", "u", "U", and "R" + +    // Check for raw flavors. Need to make sure the first character wasn't +    // already R. Need CPlusPlus11 check for "LR". +    if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus11) +      return true; // "LR", "uR", "UR" + +    // Check for "u8" and "u8R" +    if (Str[0] == 'u' && Str[1] == '8') { +      if (Str.size() == 2) return true; // "u8" +      if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R" +    } +  } + +  return false; +} + +/// IsIdentifierStringPrefix - Return true if the spelling of the token +/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions. +bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const { +  const LangOptions &LangOpts = PP.getLangOpts(); + +  if (!Tok.needsCleaning()) { +    if (Tok.getLength() < 1 || Tok.getLength() > 3) +      return false; +    SourceManager &SM = PP.getSourceManager(); +    const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); +    return IsStringPrefix(StringRef(Ptr, Tok.getLength()), +                          LangOpts.CPlusPlus11); +  } + +  if (Tok.getLength() < 256) { +    char Buffer[256]; +    const char *TokPtr = Buffer; +    unsigned length = PP.getSpelling(Tok, TokPtr); +    return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus11); +  } + +  return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus11); +} + +TokenConcatenation::TokenConcatenation(const Preprocessor &pp) : PP(pp) { +  memset(TokenInfo, 0, sizeof(TokenInfo)); + +  // These tokens have custom code in AvoidConcat. +  TokenInfo[tok::identifier      ] |= aci_custom; +  TokenInfo[tok::numeric_constant] |= aci_custom_firstchar; +  TokenInfo[tok::period          ] |= aci_custom_firstchar; +  TokenInfo[tok::amp             ] |= aci_custom_firstchar; +  TokenInfo[tok::plus            ] |= aci_custom_firstchar; +  TokenInfo[tok::minus           ] |= aci_custom_firstchar; +  TokenInfo[tok::slash           ] |= aci_custom_firstchar; +  TokenInfo[tok::less            ] |= aci_custom_firstchar; +  TokenInfo[tok::greater         ] |= aci_custom_firstchar; +  TokenInfo[tok::pipe            ] |= aci_custom_firstchar; +  TokenInfo[tok::percent         ] |= aci_custom_firstchar; +  TokenInfo[tok::colon           ] |= aci_custom_firstchar; +  TokenInfo[tok::hash            ] |= aci_custom_firstchar; +  TokenInfo[tok::arrow           ] |= aci_custom_firstchar; + +  // These tokens have custom code in C++11 mode. +  if (PP.getLangOpts().CPlusPlus11) { +    TokenInfo[tok::string_literal      ] |= aci_custom; +    TokenInfo[tok::wide_string_literal ] |= aci_custom; +    TokenInfo[tok::utf8_string_literal ] |= aci_custom; +    TokenInfo[tok::utf16_string_literal] |= aci_custom; +    TokenInfo[tok::utf32_string_literal] |= aci_custom; +    TokenInfo[tok::char_constant       ] |= aci_custom; +    TokenInfo[tok::wide_char_constant  ] |= aci_custom; +    TokenInfo[tok::utf16_char_constant ] |= aci_custom; +    TokenInfo[tok::utf32_char_constant ] |= aci_custom; +  } + +  // These tokens have custom code in C++17 mode. +  if (PP.getLangOpts().CPlusPlus17) +    TokenInfo[tok::utf8_char_constant] |= aci_custom; + +  // These tokens have custom code in C++2a mode. +  if (PP.getLangOpts().CPlusPlus2a) +    TokenInfo[tok::lessequal ] |= aci_custom_firstchar; + +  // These tokens change behavior if followed by an '='. +  TokenInfo[tok::amp         ] |= aci_avoid_equal;           // &= +  TokenInfo[tok::plus        ] |= aci_avoid_equal;           // += +  TokenInfo[tok::minus       ] |= aci_avoid_equal;           // -= +  TokenInfo[tok::slash       ] |= aci_avoid_equal;           // /= +  TokenInfo[tok::less        ] |= aci_avoid_equal;           // <= +  TokenInfo[tok::greater     ] |= aci_avoid_equal;           // >= +  TokenInfo[tok::pipe        ] |= aci_avoid_equal;           // |= +  TokenInfo[tok::percent     ] |= aci_avoid_equal;           // %= +  TokenInfo[tok::star        ] |= aci_avoid_equal;           // *= +  TokenInfo[tok::exclaim     ] |= aci_avoid_equal;           // != +  TokenInfo[tok::lessless    ] |= aci_avoid_equal;           // <<= +  TokenInfo[tok::greatergreater] |= aci_avoid_equal;         // >>= +  TokenInfo[tok::caret       ] |= aci_avoid_equal;           // ^= +  TokenInfo[tok::equal       ] |= aci_avoid_equal;           // == +} + +/// GetFirstChar - Get the first character of the token \arg Tok, +/// avoiding calls to getSpelling where possible. +static char GetFirstChar(const Preprocessor &PP, const Token &Tok) { +  if (IdentifierInfo *II = Tok.getIdentifierInfo()) { +    // Avoid spelling identifiers, the most common form of token. +    return II->getNameStart()[0]; +  } else if (!Tok.needsCleaning()) { +    if (Tok.isLiteral() && Tok.getLiteralData()) { +      return *Tok.getLiteralData(); +    } else { +      SourceManager &SM = PP.getSourceManager(); +      return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); +    } +  } else if (Tok.getLength() < 256) { +    char Buffer[256]; +    const char *TokPtr = Buffer; +    PP.getSpelling(Tok, TokPtr); +    return TokPtr[0]; +  } else { +    return PP.getSpelling(Tok)[0]; +  } +} + +/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause +/// the two individual tokens to be lexed as a single token, return true +/// (which causes a space to be printed between them).  This allows the output +/// of -E mode to be lexed to the same token stream as lexing the input +/// directly would. +/// +/// This code must conservatively return true if it doesn't want to be 100% +/// accurate.  This will cause the output to include extra space characters, +/// but the resulting output won't have incorrect concatenations going on. +/// Examples include "..", which we print with a space between, because we +/// don't want to track enough to tell "x.." from "...". +bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, +                                     const Token &PrevTok, +                                     const Token &Tok) const { +  // Conservatively assume that every annotation token that has a printable +  // form requires whitespace. +  if (PrevTok.isAnnotation()) +    return true; + +  // First, check to see if the tokens were directly adjacent in the original +  // source.  If they were, it must be okay to stick them together: if there +  // were an issue, the tokens would have been lexed differently. +  SourceManager &SM = PP.getSourceManager(); +  SourceLocation PrevSpellLoc = SM.getSpellingLoc(PrevTok.getLocation()); +  SourceLocation SpellLoc = SM.getSpellingLoc(Tok.getLocation()); +  if (PrevSpellLoc.getLocWithOffset(PrevTok.getLength()) == SpellLoc) +    return false; + +  tok::TokenKind PrevKind = PrevTok.getKind(); +  if (!PrevTok.isAnnotation() && PrevTok.getIdentifierInfo()) +    PrevKind = tok::identifier; // Language keyword or named operator. + +  // Look up information on when we should avoid concatenation with prevtok. +  unsigned ConcatInfo = TokenInfo[PrevKind]; + +  // If prevtok never causes a problem for anything after it, return quickly. +  if (ConcatInfo == 0) return false; + +  if (ConcatInfo & aci_avoid_equal) { +    // If the next token is '=' or '==', avoid concatenation. +    if (Tok.isOneOf(tok::equal, tok::equalequal)) +      return true; +    ConcatInfo &= ~aci_avoid_equal; +  } +  if (Tok.isAnnotation()) { +    // Modules annotation can show up when generated automatically for includes. +    assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin, +                       tok::annot_module_end) && +           "unexpected annotation in AvoidConcat"); +    ConcatInfo = 0; +  } + +  if (ConcatInfo == 0) +    return false; + +  // Basic algorithm: we look at the first character of the second token, and +  // determine whether it, if appended to the first token, would form (or +  // would contribute) to a larger token if concatenated. +  char FirstChar = 0; +  if (ConcatInfo & aci_custom) { +    // If the token does not need to know the first character, don't get it. +  } else { +    FirstChar = GetFirstChar(PP, Tok); +  } + +  switch (PrevKind) { +  default: +    llvm_unreachable("InitAvoidConcatTokenInfo built wrong"); + +  case tok::raw_identifier: +    llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); + +  case tok::string_literal: +  case tok::wide_string_literal: +  case tok::utf8_string_literal: +  case tok::utf16_string_literal: +  case tok::utf32_string_literal: +  case tok::char_constant: +  case tok::wide_char_constant: +  case tok::utf8_char_constant: +  case tok::utf16_char_constant: +  case tok::utf32_char_constant: +    if (!PP.getLangOpts().CPlusPlus11) +      return false; + +    // In C++11, a string or character literal followed by an identifier is a +    // single token. +    if (Tok.getIdentifierInfo()) +      return true; + +    // A ud-suffix is an identifier. If the previous token ends with one, treat +    // it as an identifier. +    if (!PrevTok.hasUDSuffix()) +      return false; +    LLVM_FALLTHROUGH; +  case tok::identifier:   // id+id or id+number or id+L"foo". +    // id+'.'... will not append. +    if (Tok.is(tok::numeric_constant)) +      return GetFirstChar(PP, Tok) != '.'; + +    if (Tok.getIdentifierInfo() || +        Tok.isOneOf(tok::wide_string_literal, tok::utf8_string_literal, +                    tok::utf16_string_literal, tok::utf32_string_literal, +                    tok::wide_char_constant, tok::utf8_char_constant, +                    tok::utf16_char_constant, tok::utf32_char_constant)) +      return true; + +    // If this isn't identifier + string, we're done. +    if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal)) +      return false; + +    // Otherwise, this is a narrow character or string.  If the *identifier* +    // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo". +    return IsIdentifierStringPrefix(PrevTok); + +  case tok::numeric_constant: +    return isPreprocessingNumberBody(FirstChar) || +           FirstChar == '+' || FirstChar == '-'; +  case tok::period:          // ..., .*, .1234 +    return (FirstChar == '.' && PrevPrevTok.is(tok::period)) || +           isDigit(FirstChar) || +           (PP.getLangOpts().CPlusPlus && FirstChar == '*'); +  case tok::amp:             // && +    return FirstChar == '&'; +  case tok::plus:            // ++ +    return FirstChar == '+'; +  case tok::minus:           // --, ->, ->* +    return FirstChar == '-' || FirstChar == '>'; +  case tok::slash:           //, /*, // +    return FirstChar == '*' || FirstChar == '/'; +  case tok::less:            // <<, <<=, <:, <% +    return FirstChar == '<' || FirstChar == ':' || FirstChar == '%'; +  case tok::greater:         // >>, >>= +    return FirstChar == '>'; +  case tok::pipe:            // || +    return FirstChar == '|'; +  case tok::percent:         // %>, %: +    return FirstChar == '>' || FirstChar == ':'; +  case tok::colon:           // ::, :> +    return FirstChar == '>' || +    (PP.getLangOpts().CPlusPlus && FirstChar == ':'); +  case tok::hash:            // ##, #@, %:%: +    return FirstChar == '#' || FirstChar == '@' || FirstChar == '%'; +  case tok::arrow:           // ->* +    return PP.getLangOpts().CPlusPlus && FirstChar == '*'; +  case tok::lessequal:       // <=> (C++2a) +    return PP.getLangOpts().CPlusPlus2a && FirstChar == '>'; +  } +} diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp new file mode 100644 index 000000000000..da5681aaf478 --- /dev/null +++ b/clang/lib/Lex/TokenLexer.cpp @@ -0,0 +1,1079 @@ +//===- TokenLexer.cpp - Lex from a token stream ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the TokenLexer interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/TokenLexer.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/MacroArgs.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include "clang/Lex/VariadicMacroSupport.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include <cassert> +#include <cstring> + +using namespace clang; + +/// Create a TokenLexer for the specified macro with the specified actual +/// arguments.  Note that this ctor takes ownership of the ActualArgs pointer. +void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, +                      MacroArgs *Actuals) { +  // If the client is reusing a TokenLexer, make sure to free any memory +  // associated with it. +  destroy(); + +  Macro = MI; +  ActualArgs = Actuals; +  CurTokenIdx = 0; + +  ExpandLocStart = Tok.getLocation(); +  ExpandLocEnd = ELEnd; +  AtStartOfLine = Tok.isAtStartOfLine(); +  HasLeadingSpace = Tok.hasLeadingSpace(); +  NextTokGetsSpace = false; +  Tokens = &*Macro->tokens_begin(); +  OwnsTokens = false; +  DisableMacroExpansion = false; +  IsReinject = false; +  NumTokens = Macro->tokens_end()-Macro->tokens_begin(); +  MacroExpansionStart = SourceLocation(); + +  SourceManager &SM = PP.getSourceManager(); +  MacroStartSLocOffset = SM.getNextLocalOffset(); + +  if (NumTokens > 0) { +    assert(Tokens[0].getLocation().isValid()); +    assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) && +           "Macro defined in macro?"); +    assert(ExpandLocStart.isValid()); + +    // Reserve a source location entry chunk for the length of the macro +    // definition. Tokens that get lexed directly from the definition will +    // have their locations pointing inside this chunk. This is to avoid +    // creating separate source location entries for each token. +    MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation()); +    MacroDefLength = Macro->getDefinitionLength(SM); +    MacroExpansionStart = SM.createExpansionLoc(MacroDefStart, +                                                ExpandLocStart, +                                                ExpandLocEnd, +                                                MacroDefLength); +  } + +  // If this is a function-like macro, expand the arguments and change +  // Tokens to point to the expanded tokens. +  if (Macro->isFunctionLike() && Macro->getNumParams()) +    ExpandFunctionArguments(); + +  // Mark the macro as currently disabled, so that it is not recursively +  // expanded.  The macro must be disabled only after argument pre-expansion of +  // function-like macro arguments occurs. +  Macro->DisableMacro(); +} + +/// Create a TokenLexer for the specified token stream.  This does not +/// take ownership of the specified token vector. +void TokenLexer::Init(const Token *TokArray, unsigned NumToks, +                      bool disableMacroExpansion, bool ownsTokens, +                      bool isReinject) { +  assert(!isReinject || disableMacroExpansion); +  // If the client is reusing a TokenLexer, make sure to free any memory +  // associated with it. +  destroy(); + +  Macro = nullptr; +  ActualArgs = nullptr; +  Tokens = TokArray; +  OwnsTokens = ownsTokens; +  DisableMacroExpansion = disableMacroExpansion; +  IsReinject = isReinject; +  NumTokens = NumToks; +  CurTokenIdx = 0; +  ExpandLocStart = ExpandLocEnd = SourceLocation(); +  AtStartOfLine = false; +  HasLeadingSpace = false; +  NextTokGetsSpace = false; +  MacroExpansionStart = SourceLocation(); + +  // Set HasLeadingSpace/AtStartOfLine so that the first token will be +  // returned unmodified. +  if (NumToks != 0) { +    AtStartOfLine   = TokArray[0].isAtStartOfLine(); +    HasLeadingSpace = TokArray[0].hasLeadingSpace(); +  } +} + +void TokenLexer::destroy() { +  // If this was a function-like macro that actually uses its arguments, delete +  // the expanded tokens. +  if (OwnsTokens) { +    delete [] Tokens; +    Tokens = nullptr; +    OwnsTokens = false; +  } + +  // TokenLexer owns its formal arguments. +  if (ActualArgs) ActualArgs->destroy(PP); +} + +bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( +    SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro, +    unsigned MacroArgNo, Preprocessor &PP) { +  // Is the macro argument __VA_ARGS__? +  if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1) +    return false; + +  // In Microsoft-compatibility mode, a comma is removed in the expansion +  // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty.  This extension is +  // not supported by gcc. +  if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat) +    return false; + +  // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if +  // __VA_ARGS__ is empty, but not in strict C99 mode where there are no +  // named arguments, where it remains.  In all other modes, including C99 +  // with GNU extensions, it is removed regardless of named arguments. +  // Microsoft also appears to support this extension, unofficially. +  if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode +        && Macro->getNumParams() < 2) +    return false; + +  // Is a comma available to be removed? +  if (ResultToks.empty() || !ResultToks.back().is(tok::comma)) +    return false; + +  // Issue an extension diagnostic for the paste operator. +  if (HasPasteOperator) +    PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); + +  // Remove the comma. +  ResultToks.pop_back(); + +  if (!ResultToks.empty()) { +    // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"), +    // then removal of the comma should produce a placemarker token (in C99 +    // terms) which we model by popping off the previous ##, giving us a plain +    // "X" when __VA_ARGS__ is empty. +    if (ResultToks.back().is(tok::hashhash)) +      ResultToks.pop_back(); + +    // Remember that this comma was elided. +    ResultToks.back().setFlag(Token::CommaAfterElided); +  } + +  // Never add a space, even if the comma, ##, or arg had a space. +  NextTokGetsSpace = false; +  return true; +} + +void TokenLexer::stringifyVAOPTContents( +    SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx, +    const SourceLocation VAOPTClosingParenLoc) { +  const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt(); +  const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt; +  Token *const VAOPTTokens = +      NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr; + +  SmallVector<Token, 64> ConcatenatedVAOPTResultToks; +  // FIXME: Should we keep track within VCtx that we did or didnot +  // encounter pasting - and only then perform this loop. + +  // Perform token pasting (concatenation) prior to stringization. +  for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens; +       ++CurTokenIdx) { +    if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) { +      assert(CurTokenIdx != 0 && +             "Can not have __VAOPT__ contents begin with a ##"); +      Token &LHS = VAOPTTokens[CurTokenIdx - 1]; +      pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens), +                  CurTokenIdx); +      // Replace the token prior to the first ## in this iteration. +      ConcatenatedVAOPTResultToks.back() = LHS; +      if (CurTokenIdx == NumVAOptTokens) +        break; +    } +    ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]); +  } + +  ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok()); +  // Get the SourceLocation that represents the start location within +  // the macro definition that marks where this string is substituted +  // into: i.e. the __VA_OPT__ and the ')' within the spelling of the +  // macro definition, and use it to indicate that the stringified token +  // was generated from that location. +  const SourceLocation ExpansionLocStartWithinMacro = +      getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc()); +  const SourceLocation ExpansionLocEndWithinMacro = +      getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc); + +  Token StringifiedVAOPT = MacroArgs::StringifyArgument( +      &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/, +      ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro); + +  if (VCtx.getLeadingSpaceForStringifiedToken()) +    StringifiedVAOPT.setFlag(Token::LeadingSpace); + +  StringifiedVAOPT.setFlag(Token::StringifiedInMacro); +  // Resize (shrink) the token stream to just capture this stringified token. +  ResultToks.resize(NumToksPriorToVAOpt + 1); +  ResultToks.back() = StringifiedVAOPT; +} + +/// Expand the arguments of a function-like macro so that we can quickly +/// return preexpanded tokens from Tokens. +void TokenLexer::ExpandFunctionArguments() { +  SmallVector<Token, 128> ResultToks; + +  // Loop through 'Tokens', expanding them into ResultToks.  Keep +  // track of whether we change anything.  If not, no need to keep them.  If so, +  // we install the newly expanded sequence as the new 'Tokens' list. +  bool MadeChange = false; + +  Optional<bool> CalledWithVariadicArguments; + +  VAOptExpansionContext VCtx(PP); + +  for (unsigned I = 0, E = NumTokens; I != E; ++I) { +    const Token &CurTok = Tokens[I]; +    // We don't want a space for the next token after a paste +    // operator.  In valid code, the token will get smooshed onto the +    // preceding one anyway. In assembler-with-cpp mode, invalid +    // pastes are allowed through: in this case, we do not want the +    // extra whitespace to be added.  For example, we want ". ## foo" +    // -> ".foo" not ". foo". +    if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) +      NextTokGetsSpace = true; + +    if (VCtx.isVAOptToken(CurTok)) { +      MadeChange = true; +      assert(Tokens[I + 1].is(tok::l_paren) && +             "__VA_OPT__ must be followed by '('"); + +      ++I;             // Skip the l_paren +      VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(), +                                           ResultToks.size()); + +      continue; +    } + +    // We have entered into the __VA_OPT__ context, so handle tokens +    // appropriately. +    if (VCtx.isInVAOpt()) { +      // If we are about to process a token that is either an argument to +      // __VA_OPT__ or its closing rparen, then: +      //  1) If the token is the closing rparen that exits us out of __VA_OPT__, +      //  perform any necessary stringification or placemarker processing, +      //  and/or skip to the next token. +      //  2) else if macro was invoked without variadic arguments skip this +      //  token. +      //  3) else (macro was invoked with variadic arguments) process the token +      //  normally. + +      if (Tokens[I].is(tok::l_paren)) +        VCtx.sawOpeningParen(Tokens[I].getLocation()); +      // Continue skipping tokens within __VA_OPT__ if the macro was not +      // called with variadic arguments, else let the rest of the loop handle +      // this token. Note sawClosingParen() returns true only if the r_paren matches +      // the closing r_paren of the __VA_OPT__. +      if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) { +        // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__. +        if (!CalledWithVariadicArguments.hasValue()) { +          CalledWithVariadicArguments = +              ActualArgs->invokedWithVariadicArgument(Macro, PP); +        } +        if (!*CalledWithVariadicArguments) { +          // Skip this token. +          continue; +        } +        // ... else the macro was called with variadic arguments, and we do not +        // have a closing rparen - so process this token normally. +      } else { +        // Current token is the closing r_paren which marks the end of the +        // __VA_OPT__ invocation, so handle any place-marker pasting (if +        // empty) by removing hashhash either before (if exists) or after. And +        // also stringify the entire contents if VAOPT was preceded by a hash, +        // but do so only after any token concatenation that needs to occur +        // within the contents of VAOPT. + +        if (VCtx.hasStringifyOrCharifyBefore()) { +          // Replace all the tokens just added from within VAOPT into a single +          // stringified token. This requires token-pasting to eagerly occur +          // within these tokens. If either the contents of VAOPT were empty +          // or the macro wasn't called with any variadic arguments, the result +          // is a token that represents an empty string. +          stringifyVAOPTContents(ResultToks, VCtx, +                                 /*ClosingParenLoc*/ Tokens[I].getLocation()); + +        } else if (/*No tokens within VAOPT*/ +                   ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) { +          // Treat VAOPT as a placemarker token.  Eat either the '##' before the +          // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that +          // hashhash was not a placemarker) or the '##' +          // after VAOPT, but not both. + +          if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) { +            ResultToks.pop_back(); +          } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) { +            ++I; // Skip the following hashhash. +          } +        } else { +          // If there's a ## before the __VA_OPT__, we might have discovered +          // that the __VA_OPT__ begins with a placeholder. We delay action on +          // that to now to avoid messing up our stashed count of tokens before +          // __VA_OPT__. +          if (VCtx.beginsWithPlaceholder()) { +            assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 && +                   ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() && +                   ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is( +                       tok::hashhash) && +                   "no token paste before __VA_OPT__"); +            ResultToks.erase(ResultToks.begin() + +                             VCtx.getNumberOfTokensPriorToVAOpt() - 1); +          } +          // If the expansion of __VA_OPT__ ends with a placeholder, eat any +          // following '##' token. +          if (VCtx.endsWithPlaceholder() && I + 1 != E && +              Tokens[I + 1].is(tok::hashhash)) { +            ++I; +          } +        } +        VCtx.reset(); +        // We processed __VA_OPT__'s closing paren (and the exit out of +        // __VA_OPT__), so skip to the next token. +        continue; +      } +    } + +    // If we found the stringify operator, get the argument stringified.  The +    // preprocessor already verified that the following token is a macro +    // parameter or __VA_OPT__ when the #define was lexed. + +    if (CurTok.isOneOf(tok::hash, tok::hashat)) { +      int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo()); +      assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) && +             "Token following # is not an argument or __VA_OPT__!"); + +      if (ArgNo == -1) { +        // Handle the __VA_OPT__ case. +        VCtx.sawHashOrHashAtBefore(NextTokGetsSpace, +                                   CurTok.is(tok::hashat)); +        continue; +      } +      // Else handle the simple argument case. +      SourceLocation ExpansionLocStart = +          getExpansionLocForMacroDefLoc(CurTok.getLocation()); +      SourceLocation ExpansionLocEnd = +          getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation()); + +      bool Charify = CurTok.is(tok::hashat); +      const Token *UnexpArg = ActualArgs->getUnexpArgument(ArgNo); +      Token Res = MacroArgs::StringifyArgument( +          UnexpArg, PP, Charify, ExpansionLocStart, ExpansionLocEnd); +      Res.setFlag(Token::StringifiedInMacro); + +      // The stringified/charified string leading space flag gets set to match +      // the #/#@ operator. +      if (NextTokGetsSpace) +        Res.setFlag(Token::LeadingSpace); + +      ResultToks.push_back(Res); +      MadeChange = true; +      ++I;  // Skip arg name. +      NextTokGetsSpace = false; +      continue; +    } + +    // Find out if there is a paste (##) operator before or after the token. +    bool NonEmptyPasteBefore = +      !ResultToks.empty() && ResultToks.back().is(tok::hashhash); +    bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash); +    bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash); +    bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren); + +    assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) && +           "unexpected ## in ResultToks"); + +    // Otherwise, if this is not an argument token, just add the token to the +    // output buffer. +    IdentifierInfo *II = CurTok.getIdentifierInfo(); +    int ArgNo = II ? Macro->getParameterNum(II) : -1; +    if (ArgNo == -1) { +      // This isn't an argument, just add it. +      ResultToks.push_back(CurTok); + +      if (NextTokGetsSpace) { +        ResultToks.back().setFlag(Token::LeadingSpace); +        NextTokGetsSpace = false; +      } else if (PasteBefore && !NonEmptyPasteBefore) +        ResultToks.back().clearFlag(Token::LeadingSpace); + +      continue; +    } + +    // An argument is expanded somehow, the result is different than the +    // input. +    MadeChange = true; + +    // Otherwise, this is a use of the argument. + +    // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there +    // are no trailing commas if __VA_ARGS__ is empty. +    if (!PasteBefore && ActualArgs->isVarargsElidedUse() && +        MaybeRemoveCommaBeforeVaArgs(ResultToks, +                                     /*HasPasteOperator=*/false, +                                     Macro, ArgNo, PP)) +      continue; + +    // If it is not the LHS/RHS of a ## operator, we must pre-expand the +    // argument and substitute the expanded tokens into the result.  This is +    // C99 6.10.3.1p1. +    if (!PasteBefore && !PasteAfter) { +      const Token *ResultArgToks; + +      // Only preexpand the argument if it could possibly need it.  This +      // avoids some work in common cases. +      const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); +      if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) +        ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; +      else +        ResultArgToks = ArgTok;  // Use non-preexpanded tokens. + +      // If the arg token expanded into anything, append it. +      if (ResultArgToks->isNot(tok::eof)) { +        size_t FirstResult = ResultToks.size(); +        unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); +        ResultToks.append(ResultArgToks, ResultArgToks+NumToks); + +        // In Microsoft-compatibility mode, we follow MSVC's preprocessing +        // behavior by not considering single commas from nested macro +        // expansions as argument separators. Set a flag on the token so we can +        // test for this later when the macro expansion is processed. +        if (PP.getLangOpts().MSVCCompat && NumToks == 1 && +            ResultToks.back().is(tok::comma)) +          ResultToks.back().setFlag(Token::IgnoredComma); + +        // If the '##' came from expanding an argument, turn it into 'unknown' +        // to avoid pasting. +        for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult, +                                           ResultToks.end())) { +          if (Tok.is(tok::hashhash)) +            Tok.setKind(tok::unknown); +        } + +        if(ExpandLocStart.isValid()) { +          updateLocForMacroArgTokens(CurTok.getLocation(), +                                     ResultToks.begin()+FirstResult, +                                     ResultToks.end()); +        } + +        // If any tokens were substituted from the argument, the whitespace +        // before the first token should match the whitespace of the arg +        // identifier. +        ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, +                                             NextTokGetsSpace); +        ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false); +        NextTokGetsSpace = false; +      } else { +        // We're creating a placeholder token. Usually this doesn't matter, +        // but it can affect paste behavior when at the start or end of a +        // __VA_OPT__. +        if (NonEmptyPasteBefore) { +          // We're imagining a placeholder token is inserted here. If this is +          // the first token in a __VA_OPT__ after a ##, delete the ##. +          assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__"); +          VCtx.hasPlaceholderAfterHashhashAtStart(); +        } +        if (RParenAfter) +          VCtx.hasPlaceholderBeforeRParen(); +      } +      continue; +    } + +    // Okay, we have a token that is either the LHS or RHS of a paste (##) +    // argument.  It gets substituted as its non-pre-expanded tokens. +    const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); +    unsigned NumToks = MacroArgs::getArgLength(ArgToks); +    if (NumToks) {  // Not an empty argument? +      bool VaArgsPseudoPaste = false; +      // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned +      // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when +      // the expander tries to paste ',' with the first token of the __VA_ARGS__ +      // expansion. +      if (NonEmptyPasteBefore && ResultToks.size() >= 2 && +          ResultToks[ResultToks.size()-2].is(tok::comma) && +          (unsigned)ArgNo == Macro->getNumParams()-1 && +          Macro->isVariadic()) { +        VaArgsPseudoPaste = true; +        // Remove the paste operator, report use of the extension. +        PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma); +      } + +      ResultToks.append(ArgToks, ArgToks+NumToks); + +      // If the '##' came from expanding an argument, turn it into 'unknown' +      // to avoid pasting. +      for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks, +                                         ResultToks.end())) { +        if (Tok.is(tok::hashhash)) +          Tok.setKind(tok::unknown); +      } + +      if (ExpandLocStart.isValid()) { +        updateLocForMacroArgTokens(CurTok.getLocation(), +                                   ResultToks.end()-NumToks, ResultToks.end()); +      } + +      // Transfer the leading whitespace information from the token +      // (the macro argument) onto the first token of the +      // expansion. Note that we don't do this for the GNU +      // pseudo-paste extension ", ## __VA_ARGS__". +      if (!VaArgsPseudoPaste) { +        ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine, +                                                             false); +        ResultToks[ResultToks.size() - NumToks].setFlagValue( +            Token::LeadingSpace, NextTokGetsSpace); +      } + +      NextTokGetsSpace = false; +      continue; +    } + +    // If an empty argument is on the LHS or RHS of a paste, the standard (C99 +    // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur.  We +    // implement this by eating ## operators when a LHS or RHS expands to +    // empty. +    if (PasteAfter) { +      // Discard the argument token and skip (don't copy to the expansion +      // buffer) the paste operator after it. +      ++I; +      continue; +    } + +    if (RParenAfter) +      VCtx.hasPlaceholderBeforeRParen(); + +    // If this is on the RHS of a paste operator, we've already copied the +    // paste operator to the ResultToks list, unless the LHS was empty too. +    // Remove it. +    assert(PasteBefore); +    if (NonEmptyPasteBefore) { +      assert(ResultToks.back().is(tok::hashhash)); +      // Do not remove the paste operator if it is the one before __VA_OPT__ +      // (and we are still processing tokens within VA_OPT).  We handle the case +      // of removing the paste operator if __VA_OPT__ reduces to the notional +      // placemarker above when we encounter the closing paren of VA_OPT. +      if (!VCtx.isInVAOpt() || +          ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt()) +        ResultToks.pop_back(); +      else +        VCtx.hasPlaceholderAfterHashhashAtStart(); +    } + +    // If this is the __VA_ARGS__ token, and if the argument wasn't provided, +    // and if the macro had at least one real argument, and if the token before +    // the ## was a comma, remove the comma.  This is a GCC extension which is +    // disabled when using -std=c99. +    if (ActualArgs->isVarargsElidedUse()) +      MaybeRemoveCommaBeforeVaArgs(ResultToks, +                                   /*HasPasteOperator=*/true, +                                   Macro, ArgNo, PP); +  } + +  // If anything changed, install this as the new Tokens list. +  if (MadeChange) { +    assert(!OwnsTokens && "This would leak if we already own the token list"); +    // This is deleted in the dtor. +    NumTokens = ResultToks.size(); +    // The tokens will be added to Preprocessor's cache and will be removed +    // when this TokenLexer finishes lexing them. +    Tokens = PP.cacheMacroExpandedTokens(this, ResultToks); + +    // The preprocessor cache of macro expanded tokens owns these tokens,not us. +    OwnsTokens = false; +  } +} + +/// Checks if two tokens form wide string literal. +static bool isWideStringLiteralFromMacro(const Token &FirstTok, +                                         const Token &SecondTok) { +  return FirstTok.is(tok::identifier) && +         FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() && +         SecondTok.stringifiedInMacro(); +} + +/// Lex - Lex and return a token from this macro stream. +bool TokenLexer::Lex(Token &Tok) { +  // Lexing off the end of the macro, pop this macro off the expansion stack. +  if (isAtEnd()) { +    // If this is a macro (not a token stream), mark the macro enabled now +    // that it is no longer being expanded. +    if (Macro) Macro->EnableMacro(); + +    Tok.startToken(); +    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); +    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace); +    if (CurTokenIdx == 0) +      Tok.setFlag(Token::LeadingEmptyMacro); +    return PP.HandleEndOfTokenLexer(Tok); +  } + +  SourceManager &SM = PP.getSourceManager(); + +  // If this is the first token of the expanded result, we inherit spacing +  // properties later. +  bool isFirstToken = CurTokenIdx == 0; + +  // Get the next token to return. +  Tok = Tokens[CurTokenIdx++]; +  if (IsReinject) +    Tok.setFlag(Token::IsReinjected); + +  bool TokenIsFromPaste = false; + +  // If this token is followed by a token paste (##) operator, paste the tokens! +  // Note that ## is a normal token when not expanding a macro. +  if (!isAtEnd() && Macro && +      (Tokens[CurTokenIdx].is(tok::hashhash) || +       // Special processing of L#x macros in -fms-compatibility mode. +       // Microsoft compiler is able to form a wide string literal from +       // 'L#macro_arg' construct in a function-like macro. +       (PP.getLangOpts().MSVCCompat && +        isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) { +    // When handling the microsoft /##/ extension, the final token is +    // returned by pasteTokens, not the pasted token. +    if (pasteTokens(Tok)) +      return true; + +    TokenIsFromPaste = true; +  } + +  // The token's current location indicate where the token was lexed from.  We +  // need this information to compute the spelling of the token, but any +  // diagnostics for the expanded token should appear as if they came from +  // ExpansionLoc.  Pull this information together into a new SourceLocation +  // that captures all of this. +  if (ExpandLocStart.isValid() &&   // Don't do this for token streams. +      // Check that the token's location was not already set properly. +      SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { +    SourceLocation instLoc; +    if (Tok.is(tok::comment)) { +      instLoc = SM.createExpansionLoc(Tok.getLocation(), +                                      ExpandLocStart, +                                      ExpandLocEnd, +                                      Tok.getLength()); +    } else { +      instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); +    } + +    Tok.setLocation(instLoc); +  } + +  // If this is the first token, set the lexical properties of the token to +  // match the lexical properties of the macro identifier. +  if (isFirstToken) { +    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); +    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); +  } else { +    // If this is not the first token, we may still need to pass through +    // leading whitespace if we've expanded a macro. +    if (AtStartOfLine) Tok.setFlag(Token::StartOfLine); +    if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace); +  } +  AtStartOfLine = false; +  HasLeadingSpace = false; + +  // Handle recursive expansion! +  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { +    // Change the kind of this identifier to the appropriate token kind, e.g. +    // turning "for" into a keyword. +    IdentifierInfo *II = Tok.getIdentifierInfo(); +    Tok.setKind(II->getTokenID()); + +    // If this identifier was poisoned and from a paste, emit an error.  This +    // won't be handled by Preprocessor::HandleIdentifier because this is coming +    // from a macro expansion. +    if (II->isPoisoned() && TokenIsFromPaste) { +      PP.HandlePoisonedIdentifier(Tok); +    } + +    if (!DisableMacroExpansion && II->isHandleIdentifierCase()) +      return PP.HandleIdentifier(Tok); +  } + +  // Otherwise, return a normal token. +  return true; +} + +bool TokenLexer::pasteTokens(Token &Tok) { +  return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx); +} + +/// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ## +/// operator.  Read the ## and RHS, and paste the LHS/RHS together.  If there +/// are more ## after it, chomp them iteratively.  Return the result as LHSTok. +/// If this returns true, the caller should immediately return the token. +bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, +                             unsigned int &CurIdx) { +  assert(CurIdx > 0 && "## can not be the first token within tokens"); +  assert((TokenStream[CurIdx].is(tok::hashhash) || +         (PP.getLangOpts().MSVCCompat && +          isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) && +             "Token at this Index must be ## or part of the MSVC 'L " +             "#macro-arg' pasting pair"); + +  // MSVC: If previous token was pasted, this must be a recovery from an invalid +  // paste operation. Ignore spaces before this token to mimic MSVC output. +  // Required for generating valid UUID strings in some MS headers. +  if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) && +      TokenStream[CurIdx - 2].is(tok::hashhash)) +    LHSTok.clearFlag(Token::LeadingSpace); + +  SmallString<128> Buffer; +  const char *ResultTokStrPtr = nullptr; +  SourceLocation StartLoc = LHSTok.getLocation(); +  SourceLocation PasteOpLoc; + +  auto IsAtEnd = [&TokenStream, &CurIdx] { +    return TokenStream.size() == CurIdx; +  }; + +  do { +    // Consume the ## operator if any. +    PasteOpLoc = TokenStream[CurIdx].getLocation(); +    if (TokenStream[CurIdx].is(tok::hashhash)) +      ++CurIdx; +    assert(!IsAtEnd() && "No token on the RHS of a paste operator!"); + +    // Get the RHS token. +    const Token &RHS = TokenStream[CurIdx]; + +    // Allocate space for the result token.  This is guaranteed to be enough for +    // the two tokens. +    Buffer.resize(LHSTok.getLength() + RHS.getLength()); + +    // Get the spelling of the LHS token in Buffer. +    const char *BufPtr = &Buffer[0]; +    bool Invalid = false; +    unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid); +    if (BufPtr != &Buffer[0])   // Really, we want the chars in Buffer! +      memcpy(&Buffer[0], BufPtr, LHSLen); +    if (Invalid) +      return true; + +    BufPtr = Buffer.data() + LHSLen; +    unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid); +    if (Invalid) +      return true; +    if (RHSLen && BufPtr != &Buffer[LHSLen]) +      // Really, we want the chars in Buffer! +      memcpy(&Buffer[LHSLen], BufPtr, RHSLen); + +    // Trim excess space. +    Buffer.resize(LHSLen+RHSLen); + +    // Plop the pasted result (including the trailing newline and null) into a +    // scratch buffer where we can lex it. +    Token ResultTokTmp; +    ResultTokTmp.startToken(); + +    // Claim that the tmp token is a string_literal so that we can get the +    // character pointer back from CreateString in getLiteralData(). +    ResultTokTmp.setKind(tok::string_literal); +    PP.CreateString(Buffer, ResultTokTmp); +    SourceLocation ResultTokLoc = ResultTokTmp.getLocation(); +    ResultTokStrPtr = ResultTokTmp.getLiteralData(); + +    // Lex the resultant pasted token into Result. +    Token Result; + +    if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) { +      // Common paste case: identifier+identifier = identifier.  Avoid creating +      // a lexer and other overhead. +      PP.IncrementPasteCounter(true); +      Result.startToken(); +      Result.setKind(tok::raw_identifier); +      Result.setRawIdentifierData(ResultTokStrPtr); +      Result.setLocation(ResultTokLoc); +      Result.setLength(LHSLen+RHSLen); +    } else { +      PP.IncrementPasteCounter(false); + +      assert(ResultTokLoc.isFileID() && +             "Should be a raw location into scratch buffer"); +      SourceManager &SourceMgr = PP.getSourceManager(); +      FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); + +      bool Invalid = false; +      const char *ScratchBufStart +        = SourceMgr.getBufferData(LocFileID, &Invalid).data(); +      if (Invalid) +        return false; + +      // Make a lexer to lex this string from.  Lex just this one token. +      // Make a lexer object so that we lex and expand the paste result. +      Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), +               PP.getLangOpts(), ScratchBufStart, +               ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); + +      // Lex a token in raw mode.  This way it won't look up identifiers +      // automatically, lexing off the end will return an eof token, and +      // warnings are disabled.  This returns true if the result token is the +      // entire buffer. +      bool isInvalid = !TL.LexFromRawLexer(Result); + +      // If we got an EOF token, we didn't form even ONE token.  For example, we +      // did "/ ## /" to get "//". +      isInvalid |= Result.is(tok::eof); + +      // If pasting the two tokens didn't form a full new token, this is an +      // error.  This occurs with "x ## +"  and other stuff.  Return with LHSTok +      // unmodified and with RHS as the next token to lex. +      if (isInvalid) { +        // Explicitly convert the token location to have proper expansion +        // information so that the user knows where it came from. +        SourceManager &SM = PP.getSourceManager(); +        SourceLocation Loc = +          SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2); + +        // Test for the Microsoft extension of /##/ turning into // here on the +        // error path. +        if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) && +            RHS.is(tok::slash)) { +          HandleMicrosoftCommentPaste(LHSTok, Loc); +          return true; +        } + +        // Do not emit the error when preprocessing assembler code. +        if (!PP.getLangOpts().AsmPreprocessor) { +          // If we're in microsoft extensions mode, downgrade this from a hard +          // error to an extension that defaults to an error.  This allows +          // disabling it. +          PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms +                                                     : diag::err_pp_bad_paste) +              << Buffer; +        } + +        // An error has occurred so exit loop. +        break; +      } + +      // Turn ## into 'unknown' to avoid # ## # from looking like a paste +      // operator. +      if (Result.is(tok::hashhash)) +        Result.setKind(tok::unknown); +    } + +    // Transfer properties of the LHS over the Result. +    Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine()); +    Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace()); + +    // Finally, replace LHS with the result, consume the RHS, and iterate. +    ++CurIdx; +    LHSTok = Result; +  } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash)); + +  SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation(); + +  // The token's current location indicate where the token was lexed from.  We +  // need this information to compute the spelling of the token, but any +  // diagnostics for the expanded token should appear as if the token was +  // expanded from the full ## expression. Pull this information together into +  // a new SourceLocation that captures all of this. +  SourceManager &SM = PP.getSourceManager(); +  if (StartLoc.isFileID()) +    StartLoc = getExpansionLocForMacroDefLoc(StartLoc); +  if (EndLoc.isFileID()) +    EndLoc = getExpansionLocForMacroDefLoc(EndLoc); +  FileID MacroFID = SM.getFileID(MacroExpansionStart); +  while (SM.getFileID(StartLoc) != MacroFID) +    StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin(); +  while (SM.getFileID(EndLoc) != MacroFID) +    EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd(); + +  LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc, +                                        LHSTok.getLength())); + +  // Now that we got the result token, it will be subject to expansion.  Since +  // token pasting re-lexes the result token in raw mode, identifier information +  // isn't looked up.  As such, if the result is an identifier, look up id info. +  if (LHSTok.is(tok::raw_identifier)) { +    // Look up the identifier info for the token.  We disabled identifier lookup +    // by saying we're skipping contents, so we need to do this manually. +    PP.LookUpIdentifierInfo(LHSTok); +  } +  return false; +} + +/// isNextTokenLParen - If the next token lexed will pop this macro off the +/// expansion stack, return 2.  If the next unexpanded token is a '(', return +/// 1, otherwise return 0. +unsigned TokenLexer::isNextTokenLParen() const { +  // Out of tokens? +  if (isAtEnd()) +    return 2; +  return Tokens[CurTokenIdx].is(tok::l_paren); +} + +/// isParsingPreprocessorDirective - Return true if we are in the middle of a +/// preprocessor directive. +bool TokenLexer::isParsingPreprocessorDirective() const { +  return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd(); +} + +/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes +/// together to form a comment that comments out everything in the current +/// macro, other active macros, and anything left on the current physical +/// source line of the expanded buffer.  Handle this by returning the +/// first token on the next line. +void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) { +  PP.Diag(OpLoc, diag::ext_comment_paste_microsoft); + +  // We 'comment out' the rest of this macro by just ignoring the rest of the +  // tokens that have not been lexed yet, if any. + +  // Since this must be a macro, mark the macro enabled now that it is no longer +  // being expanded. +  assert(Macro && "Token streams can't paste comments"); +  Macro->EnableMacro(); + +  PP.HandleMicrosoftCommentPaste(Tok); +} + +/// If \arg loc is a file ID and points inside the current macro +/// definition, returns the appropriate source location pointing at the +/// macro expansion source location entry, otherwise it returns an invalid +/// SourceLocation. +SourceLocation +TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { +  assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() && +         "Not appropriate for token streams"); +  assert(loc.isValid() && loc.isFileID()); + +  SourceManager &SM = PP.getSourceManager(); +  assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) && +         "Expected loc to come from the macro definition"); + +  unsigned relativeOffset = 0; +  SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset); +  return MacroExpansionStart.getLocWithOffset(relativeOffset); +} + +/// Finds the tokens that are consecutive (from the same FileID) +/// creates a single SLocEntry, and assigns SourceLocations to each token that +/// point to that SLocEntry. e.g for +///   assert(foo == bar); +/// There will be a single SLocEntry for the "foo == bar" chunk and locations +/// for the 'foo', '==', 'bar' tokens will point inside that chunk. +/// +/// \arg begin_tokens will be updated to a position past all the found +/// consecutive tokens. +static void updateConsecutiveMacroArgTokens(SourceManager &SM, +                                            SourceLocation InstLoc, +                                            Token *&begin_tokens, +                                            Token * end_tokens) { +  assert(begin_tokens < end_tokens); + +  SourceLocation FirstLoc = begin_tokens->getLocation(); +  SourceLocation CurLoc = FirstLoc; + +  // Compare the source location offset of tokens and group together tokens that +  // are close, even if their locations point to different FileIDs. e.g. +  // +  //  |bar    |  foo | cake   |  (3 tokens from 3 consecutive FileIDs) +  //  ^                    ^ +  //  |bar       foo   cake|     (one SLocEntry chunk for all tokens) +  // +  // we can perform this "merge" since the token's spelling location depends +  // on the relative offset. + +  Token *NextTok = begin_tokens + 1; +  for (; NextTok < end_tokens; ++NextTok) { +    SourceLocation NextLoc = NextTok->getLocation(); +    if (CurLoc.isFileID() != NextLoc.isFileID()) +      break; // Token from different kind of FileID. + +    int RelOffs; +    if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs)) +      break; // Token from different local/loaded location. +    // Check that token is not before the previous token or more than 50 +    // "characters" away. +    if (RelOffs < 0 || RelOffs > 50) +      break; + +    if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc)) +      break; // Token from a different macro. + +    CurLoc = NextLoc; +  } + +  // For the consecutive tokens, find the length of the SLocEntry to contain +  // all of them. +  Token &LastConsecutiveTok = *(NextTok-1); +  int LastRelOffs = 0; +  SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), +                           &LastRelOffs); +  unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength(); + +  // Create a macro expansion SLocEntry that will "contain" all of the tokens. +  SourceLocation Expansion = +      SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); + +  // Change the location of the tokens from the spelling location to the new +  // expanded location. +  for (; begin_tokens < NextTok; ++begin_tokens) { +    Token &Tok = *begin_tokens; +    int RelOffs = 0; +    SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); +    Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); +  } +} + +/// Creates SLocEntries and updates the locations of macro argument +/// tokens to their new expanded locations. +/// +/// \param ArgIdSpellLoc the location of the macro argument id inside the macro +/// definition. +void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, +                                            Token *begin_tokens, +                                            Token *end_tokens) { +  SourceManager &SM = PP.getSourceManager(); + +  SourceLocation InstLoc = +      getExpansionLocForMacroDefLoc(ArgIdSpellLoc); + +  while (begin_tokens < end_tokens) { +    // If there's only one token just create a SLocEntry for it. +    if (end_tokens - begin_tokens == 1) { +      Token &Tok = *begin_tokens; +      Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), +                                                    InstLoc, +                                                    Tok.getLength())); +      return; +    } + +    updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); +  } +} + +void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { +  AtStartOfLine = Result.isAtStartOfLine(); +  HasLeadingSpace = Result.hasLeadingSpace(); +} diff --git a/clang/lib/Lex/UnicodeCharSets.h b/clang/lib/Lex/UnicodeCharSets.h new file mode 100644 index 000000000000..74dd57fdf118 --- /dev/null +++ b/clang/lib/Lex/UnicodeCharSets.h @@ -0,0 +1,407 @@ +//===--- UnicodeCharSets.h - Contains important sets of characters --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_LIB_LEX_UNICODECHARSETS_H +#define LLVM_CLANG_LIB_LEX_UNICODECHARSETS_H + +#include "llvm/Support/UnicodeCharRanges.h" + +// C11 D.1, C++11 [charname.allowed] +static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = { +  // 1 +  { 0x00A8, 0x00A8 }, { 0x00AA, 0x00AA }, { 0x00AD, 0x00AD }, +  { 0x00AF, 0x00AF }, { 0x00B2, 0x00B5 }, { 0x00B7, 0x00BA }, +  { 0x00BC, 0x00BE }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, +  { 0x00F8, 0x00FF }, +  // 2 +  { 0x0100, 0x167F }, { 0x1681, 0x180D }, { 0x180F, 0x1FFF }, +  // 3 +  { 0x200B, 0x200D }, { 0x202A, 0x202E }, { 0x203F, 0x2040 }, +  { 0x2054, 0x2054 }, { 0x2060, 0x206F }, +  // 4 +  { 0x2070, 0x218F }, { 0x2460, 0x24FF }, { 0x2776, 0x2793 }, +  { 0x2C00, 0x2DFF }, { 0x2E80, 0x2FFF }, +  // 5 +  { 0x3004, 0x3007 }, { 0x3021, 0x302F }, { 0x3031, 0x303F }, +  // 6 +  { 0x3040, 0xD7FF }, +  // 7 +  { 0xF900, 0xFD3D }, { 0xFD40, 0xFDCF }, { 0xFDF0, 0xFE44 }, +  { 0xFE47, 0xFFFD }, +  // 8 +  { 0x10000, 0x1FFFD }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }, +  { 0x40000, 0x4FFFD }, { 0x50000, 0x5FFFD }, { 0x60000, 0x6FFFD }, +  { 0x70000, 0x7FFFD }, { 0x80000, 0x8FFFD }, { 0x90000, 0x9FFFD }, +  { 0xA0000, 0xAFFFD }, { 0xB0000, 0xBFFFD }, { 0xC0000, 0xCFFFD }, +  { 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD } +}; + +// C++03 [extendid] +// Note that this is not the same as C++98, but we don't distinguish C++98 +// and C++03 in Clang. +static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[] = { +  // Latin +  { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 }, +  { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, + +  // Greek +  { 0x0384, 0x0384 }, { 0x0388, 0x038A }, { 0x038C, 0x038C }, +  { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 }, +  { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE }, +  { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, + +  // Cyrillic +  { 0x0401, 0x040D }, { 0x040F, 0x044F }, { 0x0451, 0x045C }, +  { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, +  { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, +  { 0x04F8, 0x04F9 }, + +  // Armenian +  { 0x0531, 0x0556 }, { 0x0561, 0x0587 }, + +  // Hebrew +  { 0x05D0, 0x05EA }, { 0x05F0, 0x05F4 }, + +  // Arabic +  { 0x0621, 0x063A }, { 0x0640, 0x0652 }, { 0x0670, 0x06B7 }, +  { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06E5, 0x06E7 }, + +  // Devanagari +  { 0x0905, 0x0939 }, { 0x0958, 0x0962 }, + +  // Bengali +  { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 }, +  { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 }, +  { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 }, + +  // Gurmukhi +  { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 }, +  { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 }, +  { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E }, + +  // Gujarti +  { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 }, +  { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, +  { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE0 }, + +  // Oriya +  { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 }, +  { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 }, +  { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, + +  // Tamil +  { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, +  { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F }, +  { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 }, +  { 0x0BB7, 0x0BB9 }, + +  // Telugu +  { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 }, +  { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 }, + +  // Kannada +  { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 }, +  { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CE0, 0x0CE1 }, + +  // Malayam +  { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 }, +  { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 }, + +  // Thai +  { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 }, +  { 0x0E4F, 0x0E5B }, + +  // Lao +  { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E87 }, +  { 0x0E88, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, +  { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, +  { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAA }, +  { 0x0EAB, 0x0EAB }, { 0x0EAD, 0x0EB0 }, { 0x0EB2, 0x0EB2 }, +  { 0x0EB3, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 }, +  { 0x0EC6, 0x0EC6 }, + +  // Georgian +  { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 }, + +  // Hangul +  { 0x1100, 0x1159 }, { 0x1161, 0x11A2 }, { 0x11A8, 0x11F9 }, + +  // Latin (2) +  { 0x1E00, 0x1E9A }, { 0x1EA0, 0x1EF9 }, + +  // Greek (2) +  { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, +  { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 }, +  { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D }, +  { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 }, +  { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, +  { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, + +  // Hiragana +  { 0x3041, 0x3094 }, { 0x309B, 0x309E }, + +  // Katakana +  { 0x30A1, 0x30FE }, + +  // Bopmofo [sic] +  { 0x3105, 0x312C }, + +  // CJK Unified Ideographs +  { 0x4E00, 0x9FA5 }, { 0xF900, 0xFA2D }, { 0xFB1F, 0xFB36 }, +  { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 }, +  { 0xFB42, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3F }, +  { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB }, +  { 0xFE70, 0xFE72 }, { 0xFE74, 0xFE74 }, { 0xFE76, 0xFEFC }, +  { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE }, +  { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 }, +  { 0xFFDA, 0xFFDC } +}; + +// C99 Annex D +static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[] = { +  // Latin (1) +  { 0x00AA, 0x00AA }, + +  // Special characters (1) +  { 0x00B5, 0x00B5 }, { 0x00B7, 0x00B7 }, + +  // Latin (2) +  { 0x00BA, 0x00BA }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, +  { 0x00F8, 0x01F5 }, { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, + +  // Special characters (2) +  { 0x02B0, 0x02B8 }, { 0x02BB, 0x02BB }, { 0x02BD, 0x02C1 }, +  { 0x02D0, 0x02D1 }, { 0x02E0, 0x02E4 }, { 0x037A, 0x037A }, + +  // Greek (1) +  { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C }, +  { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 }, +  { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE }, +  { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, + +  // Cyrillic +  { 0x0401, 0x040C }, { 0x040E, 0x044F }, { 0x0451, 0x045C }, +  { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, +  { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, +  { 0x04F8, 0x04F9 }, + +  // Armenian (1) +  { 0x0531, 0x0556 }, + +  // Special characters (3) +  { 0x0559, 0x0559 }, + +  // Armenian (2) +  { 0x0561, 0x0587 }, + +  // Hebrew +  { 0x05B0, 0x05B9 }, { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, +  { 0x05C1, 0x05C2 }, { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 }, + +  // Arabic (1) +  { 0x0621, 0x063A }, { 0x0640, 0x0652 }, + +  // Digits (1) +  { 0x0660, 0x0669 }, + +  // Arabic (2) +  { 0x0670, 0x06B7 }, { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, +  { 0x06D0, 0x06DC }, { 0x06E5, 0x06E8 }, { 0x06EA, 0x06ED }, + +  // Digits (2) +  { 0x06F0, 0x06F9 }, + +  // Devanagari and Special character 0x093D. +  { 0x0901, 0x0903 }, { 0x0905, 0x0939 }, { 0x093D, 0x094D }, +  { 0x0950, 0x0952 }, { 0x0958, 0x0963 }, + +  // Digits (3) +  { 0x0966, 0x096F }, + +  // Bengali (1) +  { 0x0981, 0x0983 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 }, +  { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, +  { 0x09B6, 0x09B9 }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 }, +  { 0x09CB, 0x09CD }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E3 }, + +  // Digits (4) +  { 0x09E6, 0x09EF }, + +  // Bengali (2) +  { 0x09F0, 0x09F1 }, + +  // Gurmukhi (1) +  { 0x0A02, 0x0A02 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, +  { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, +  { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A3E, 0x0A42 }, +  { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A59, 0x0A5C }, +  { 0x0A5E, 0x0A5E }, + +  // Digits (5) +  { 0x0A66, 0x0A6F }, + +  // Gurmukhi (2) +  { 0x0A74, 0x0A74 }, + +  // Gujarti +  { 0x0A81, 0x0A83 }, { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, +  { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, +  { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 }, { 0x0ABD, 0x0AC5 }, +  { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AD0, 0x0AD0 }, +  { 0x0AE0, 0x0AE0 }, + +  // Digits (6) +  { 0x0AE6, 0x0AEF }, + +  // Oriya and Special character 0x0B3D +  { 0x0B01, 0x0B03 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, +  { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, +  { 0x0B36, 0x0B39 }, { 0x0B3D, 0x0B43 }, { 0x0B47, 0x0B48 }, +  { 0x0B4B, 0x0B4D }, { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, + +  // Digits (7) +  { 0x0B66, 0x0B6F }, + +  // Tamil +  { 0x0B82, 0x0B83 }, { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, +  { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, +  { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, +  { 0x0BAE, 0x0BB5 }, { 0x0BB7, 0x0BB9 }, { 0x0BBE, 0x0BC2 }, +  { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD }, + +  // Digits (8) +  { 0x0BE7, 0x0BEF }, + +  // Telugu +  { 0x0C01, 0x0C03 }, { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, +  { 0x0C12, 0x0C28 }, { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, +  { 0x0C3E, 0x0C44 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, +  { 0x0C60, 0x0C61 }, + +  // Digits (9) +  { 0x0C66, 0x0C6F }, + +  // Kannada +  { 0x0C82, 0x0C83 }, { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, +  { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, +  { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD }, +  { 0x0CDE, 0x0CDE }, { 0x0CE0, 0x0CE1 }, + +  // Digits (10) +  { 0x0CE6, 0x0CEF }, + +  // Malayam +  { 0x0D02, 0x0D03 }, { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, +  { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, { 0x0D3E, 0x0D43 }, +  { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, { 0x0D60, 0x0D61 }, + +  // Digits (11) +  { 0x0D66, 0x0D6F }, + +  // Thai...including Digits { 0x0E50, 0x0E59 } +  { 0x0E01, 0x0E3A }, { 0x0E40, 0x0E5B }, + +  // Lao (1) +  { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E88 }, +  { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, { 0x0E94, 0x0E97 }, +  { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, { 0x0EA5, 0x0EA5 }, +  { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB }, { 0x0EAD, 0x0EAE }, +  { 0x0EB0, 0x0EB9 }, { 0x0EBB, 0x0EBD }, { 0x0EC0, 0x0EC4 }, +  { 0x0EC6, 0x0EC6 }, { 0x0EC8, 0x0ECD }, + +  // Digits (12) +  { 0x0ED0, 0x0ED9 }, + +  // Lao (2) +  { 0x0EDC, 0x0EDD }, + +  // Tibetan (1) +  { 0x0F00, 0x0F00 }, { 0x0F18, 0x0F19 }, + +  // Digits (13) +  { 0x0F20, 0x0F33 }, + +  // Tibetan (2) +  { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, +  { 0x0F3E, 0x0F47 }, { 0x0F49, 0x0F69 }, { 0x0F71, 0x0F84 }, +  { 0x0F86, 0x0F8B }, { 0x0F90, 0x0F95 }, { 0x0F97, 0x0F97 }, +  { 0x0F99, 0x0FAD }, { 0x0FB1, 0x0FB7 }, { 0x0FB9, 0x0FB9 }, + +  // Georgian +  { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 }, + +  // Latin (3) +  { 0x1E00, 0x1E9B }, { 0x1EA0, 0x1EF9 }, + +  // Greek (2) +  { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, +  { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 }, +  { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D }, +  { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, + +  // Special characters (4) +  { 0x1FBE, 0x1FBE }, + +  // Greek (3) +  { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, +  { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, +  { 0x1FF6, 0x1FFC }, + +  // Special characters (5) +  { 0x203F, 0x2040 }, + +  // Latin (4) +  { 0x207F, 0x207F }, + +  // Special characters (6) +  { 0x2102, 0x2102 }, { 0x2107, 0x2107 }, { 0x210A, 0x2113 }, +  { 0x2115, 0x2115 }, { 0x2118, 0x211D }, { 0x2124, 0x2124 }, +  { 0x2126, 0x2126 }, { 0x2128, 0x2128 }, { 0x212A, 0x2131 }, +  { 0x2133, 0x2138 }, { 0x2160, 0x2182 }, { 0x3005, 0x3007 }, +  { 0x3021, 0x3029 }, + +  // Hiragana +  { 0x3041, 0x3093 }, { 0x309B, 0x309C }, + +  // Katakana +  { 0x30A1, 0x30F6 }, { 0x30FB, 0x30FC }, + +  // Bopmofo [sic] +  { 0x3105, 0x312C }, + +  // CJK Unified Ideographs +  { 0x4E00, 0x9FA5 }, + +  // Hangul, +  { 0xAC00, 0xD7A3 } +}; + +// C11 D.2, C++11 [charname.disallowed] +static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[] = { +  { 0x0300, 0x036F }, { 0x1DC0, 0x1DFF }, { 0x20D0, 0x20FF }, +  { 0xFE20, 0xFE2F } +}; + +// C99 6.4.2.1p3: The initial character [of an identifier] shall not be a +// universal character name designating a digit. +// C99 Annex D defines these characters as "Digits". +static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[] = { +  { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 }, { 0x0966, 0x096F }, +  { 0x09E6, 0x09EF }, { 0x0A66, 0x0A6F }, { 0x0AE6, 0x0AEF }, +  { 0x0B66, 0x0B6F }, { 0x0BE7, 0x0BEF }, { 0x0C66, 0x0C6F }, +  { 0x0CE6, 0x0CEF }, { 0x0D66, 0x0D6F }, { 0x0E50, 0x0E59 }, +  { 0x0ED0, 0x0ED9 }, { 0x0F20, 0x0F33 } +}; + +// Unicode v6.2, chapter 6.2, table 6-2. +static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[] = { +  { 0x0085, 0x0085 }, { 0x00A0, 0x00A0 }, { 0x1680, 0x1680 }, +  { 0x180E, 0x180E }, { 0x2000, 0x200A }, { 0x2028, 0x2029 }, +  { 0x202F, 0x202F }, { 0x205F, 0x205F }, { 0x3000, 0x3000 } +}; + +#endif  | 
