vendor/clang/clang-trunk-r366426

author: Dimitry Andric <dim@FreeBSD.org> 2019-08-20 20:50:49 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2019-08-20 20:50:49 +0000
commit: 2298981669bf3bd63335a4be179bc0f96823a8f4 (patch)
tree: 1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /lib/Lex
parent: 9a83721404652cea39e9f02ae3e3b5c964602a5c (diff)
23 files changed, 1969 insertions, 696 deletions
diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
new file mode 100644
index 0000000000000..cfc37c5d3c62b
--- /dev/null
+++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -0,0 +1,763 @@
+//===- DependencyDirectivesSourceMinimizer.cpp -  -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the implementation for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace {
+
+struct Minimizer {
+  /// Minimized output.
+  SmallVectorImpl<char> &Out;
+  /// The known tokens encountered during the minimization.
+  SmallVectorImpl<Token> &Tokens;
+
+  Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
+            StringRef Input, DiagnosticsEngine *Diags,
+            SourceLocation InputSourceLoc)
+      : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
+        InputSourceLoc(InputSourceLoc) {}
+
+  /// Lex the provided source and emit the minimized output.
+  ///
+  /// \returns True on error.
+  bool minimize();
+
+private:
+  struct IdInfo {
+    const char *Last;
+    StringRef Name;
+  };
+
+  /// Lex an identifier.
+  ///
+  /// \pre First points at a valid identifier head.
+  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
+  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
+                                       const char *const End);
+  LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
+  LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
+                                 const char *&First, const char *const End);
+  Token &makeToken(TokenKind K) {
+    Tokens.emplace_back(K, Out.size());
+    return Tokens.back();
+  }
+  void popToken() {
+    Out.resize(Tokens.back().Offset);
+    Tokens.pop_back();
+  }
+  TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
+
+  Minimizer &put(char Byte) {
+    Out.push_back(Byte);
+    return *this;
+  }
+  Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
+  Minimizer &append(const char *First, const char *Last) {
+    Out.append(First, Last);
+    return *this;
+  }
+
+  void printToNewline(const char *&First, const char *const End);
+  void printAdjacentModuleNameParts(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printAtImportBody(const char *&First,
+                                        const char *const End);
+  void printDirectiveBody(const char *&First, const char *const End);
+  void printAdjacentMacroArgs(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
+
+  /// Reports a diagnostic if the diagnostic engine is provided. Always returns
+  /// true at the end.
+  bool reportError(const char *CurPtr, unsigned Err);
+
+  StringMap<char> SplitIds;
+  StringRef Input;
+  DiagnosticsEngine *Diags;
+  SourceLocation InputSourceLoc;
+};
+
+} // end anonymous namespace
+
+bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
+  if (!Diags)
+    return true;
+  assert(CurPtr >= Input.data() && "invalid buffer ptr");
+  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
+  return true;
+}
+
+static void skipOverSpaces(const char *&First, const char *const End) {
+  while (First != End && isHorizontalWhitespace(*First))
+    ++First;
+}
+
+LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
+                                              const char *Current) {
+  assert(First <= Current);
+
+  // Check if we can even back up.
+  if (*Current != '"' || First == Current)
+    return false;
+
+  // Check for an "R".
+  --Current;
+  if (*Current != 'R')
+    return false;
+  if (First == Current || !isIdentifierBody(*--Current))
+    return true;
+
+  // Check for a prefix of "u", "U", or "L".
+  if (*Current == 'u' || *Current == 'U' || *Current == 'L')
+    return First == Current || !isIdentifierBody(*--Current);
+
+  // Check for a prefix of "u8".
+  if (*Current != '8' || First == Current || *Current-- != 'u')
+    return false;
+  return First == Current || !isIdentifierBody(*--Current);
+}
+
+static void skipRawString(const char *&First, const char *const End) {
+  assert(First[0] == '"');
+  assert(First[-1] == 'R');
+
+  const char *Last = ++First;
+  while (Last != End && *Last != '(')
+    ++Last;
+  if (Last == End) {
+    First = Last; // Hit the end... just give up.
+    return;
+  }
+
+  StringRef Terminator(First, Last - First);
+  for (;;) {
+    // Move First to just past the next ")".
+    First = Last;
+    while (First != End && *First != ')')
+      ++First;
+    if (First == End)
+      return;
+    ++First;
+
+    // Look ahead for the terminator sequence.
+    Last = First;
+    while (Last != End && size_t(Last - First) < Terminator.size() &&
+           Terminator[Last - First] == *Last)
+      ++Last;
+
+    // Check if we hit it (or the end of the file).
+    if (Last == End) {
+      First = Last;
+      return;
+    }
+    if (size_t(Last - First) < Terminator.size())
+      continue;
+    if (*Last != '"')
+      continue;
+    First = Last + 1;
+    return;
+  }
+}
+
+static void skipString(const char *&First, const char *const End) {
+  assert(*First == '\'' || *First == '"');
+  const char Terminator = *First;
+  for (++First; First != End && *First != Terminator; ++First)
+    if (*First == '\\')
+      if (++First == End)
+        return;
+  if (First != End)
+    ++First; // Finish off the string.
+}
+
+static void skipNewline(const char *&First, const char *End) {
+  assert(isVerticalWhitespace(*First));
+  ++First;
+  if (First == End)
+    return;
+
+  // Check for "\n\r" and "\r\n".
+  if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0]))
+    ++First;
+}
+
+static void skipToNewlineRaw(const char *&First, const char *const End) {
+  for (;;) {
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First))
+      return;
+
+    while (!isVerticalWhitespace(*First))
+      if (++First == End)
+        return;
+
+    if (First[-1] != '\\')
+      return;
+
+    ++First; // Keep going...
+  }
+}
+
+static const char *reverseOverSpaces(const char *First, const char *Last) {
+  assert(First <= Last);
+  while (First != Last && isHorizontalWhitespace(Last[-1]))
+    --Last;
+  return Last;
+}
+
+static void skipLineComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '/');
+  First += 2;
+  skipToNewlineRaw(First, End);
+}
+
+static void skipBlockComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '*');
+  if (End - First < 4) {
+    First = End;
+    return;
+  }
+  for (First += 3; First != End; ++First)
+    if (First[-1] == '*' && First[0] == '/') {
+      ++First;
+      return;
+    }
+}
+
+/// \returns True if the current single quotation mark character is a C++ 14
+/// digit separator.
+static bool isQuoteCppDigitSeparator(const char *const Start,
+                                     const char *const Cur,
+                                     const char *const End) {
+  assert(*Cur == '\'' && "expected quotation character");
+  // skipLine called in places where we don't expect a valid number
+  // body before `start` on the same line, so always return false at the start.
+  if (Start == Cur)
+    return false;
+  // The previous character must be a valid PP number character.
+  // Make sure that the L, u, U, u8 prefixes don't get marked as a
+  // separator though.
+  char Prev = *(Cur - 1);
+  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+    return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+    return false;
+  if (!isPreprocessingNumberBody(Prev))
+    return false;
+  // The next character should be a valid identifier body character.
+  return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
+}
+
+static void skipLine(const char *&First, const char *const End) {
+  do {
+    assert(First <= End);
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      return;
+    }
+    const char *Start = First;
+    while (First != End && !isVerticalWhitespace(*First)) {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*First == '"' ||
+          (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
+        if (isRawStringLiteral(Start, First))
+          skipRawString(First, End);
+        else
+          skipString(First, End);
+        continue;
+      }
+
+      // Iterate over comments correctly.
+      if (*First != '/' || End - First < 2) {
+        ++First;
+        continue;
+      }
+
+      if (First[1] == '/') {
+        // "//...".
+        skipLineComment(First, End);
+        continue;
+      }
+
+      if (First[1] != '*') {
+        ++First;
+        continue;
+      }
+
+      // "/*...*/".
+      skipBlockComment(First, End);
+    }
+    if (First == End)
+      return;
+
+    // Skip over the newline.
+    assert(isVerticalWhitespace(*First));
+    skipNewline(First, End);
+  } while (First[-2] == '\\'); // Continue past line-continuations.
+}
+
+static void skipDirective(StringRef Name, const char *&First,
+                          const char *const End) {
+  if (llvm::StringSwitch<bool>(Name)
+          .Case("warning", true)
+          .Case("error", true)
+          .Default(false))
+    // Do not process quotes or comments.
+    skipToNewlineRaw(First, End);
+  else
+    skipLine(First, End);
+}
+
+void Minimizer::printToNewline(const char *&First, const char *const End) {
+  while (First != End && !isVerticalWhitespace(*First)) {
+    const char *Last = First;
+    do {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*Last == '"' || *Last == '\'') {
+        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
+          skipRawString(Last, End);
+        else
+          skipString(Last, End);
+        continue;
+      }
+      if (*Last != '/' || End - Last < 2) {
+        ++Last;
+        continue; // Gather the rest up to print verbatim.
+      }
+
+      if (Last[1] != '/' && Last[1] != '*') {
+        ++Last;
+        continue;
+      }
+
+      // Deal with "//..." and "/*...*/".
+      append(First, reverseOverSpaces(First, Last));
+      First = Last;
+
+      if (Last[1] == '/') {
+        skipLineComment(First, End);
+        return;
+      }
+
+      put(' ');
+      skipBlockComment(First, End);
+      skipOverSpaces(First, End);
+      Last = First;
+    } while (Last != End && !isVerticalWhitespace(*Last));
+
+    // Print out the string.
+    if (Last == End || Last == First || Last[-1] != '\\') {
+      append(First, reverseOverSpaces(First, Last));
+      return;
+    }
+
+    // Print up to the backslash, backing up over spaces.
+    append(First, reverseOverSpaces(First, Last - 1));
+
+    First = Last;
+    skipNewline(First, End);
+    skipOverSpaces(First, End);
+  }
+}
+
+static void skipWhitespace(const char *&First, const char *const End) {
+  for (;;) {
+    assert(First <= End);
+    skipOverSpaces(First, End);
+
+    if (End - First < 2)
+      return;
+
+    if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
+      skipNewline(++First, End);
+      continue;
+    }
+
+    // Check for a non-comment character.
+    if (First[0] != '/')
+      return;
+
+    // "// ...".
+    if (First[1] == '/') {
+      skipLineComment(First, End);
+      return;
+    }
+
+    // Cannot be a comment.
+    if (First[1] != '*')
+      return;
+
+    // "/*...*/".
+    skipBlockComment(First, End);
+  }
+}
+
+void Minimizer::printAdjacentModuleNameParts(const char *&First,
+                                             const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
+  append(First, Last);
+  First = Last;
+}
+
+bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      continue;
+    }
+
+    // Found a semicolon.
+    if (*First == ';') {
+      put(*First++).put('\n');
+      return false;
+    }
+
+    // Don't handle macro expansions inside @import for now.
+    if (!isIdentifierBody(*First) && *First != '.')
+      return true;
+
+    printAdjacentModuleNameParts(First, End);
+  }
+}
+
+void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
+  skipWhitespace(First, End); // Skip initial whitespace.
+  printToNewline(First, End);
+  while (Out.back() == ' ')
+    Out.pop_back();
+  put('\n');
+}
+
+LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
+                                                   const char *const End) {
+  assert(isIdentifierBody(*First) && "invalid identifer");
+  const char *Last = First + 1;
+  while (Last != End && isIdentifierBody(*Last))
+    ++Last;
+  return Last;
+}
+
+LLVM_NODISCARD static const char *
+getIdentifierContinuation(const char *First, const char *const End) {
+  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
+    return nullptr;
+
+  ++First;
+  skipNewline(First, End);
+  if (First == End)
+    return nullptr;
+  return isIdentifierBody(First[0]) ? First : nullptr;
+}
+
+Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
+                                           const char *const End) {
+  const char *Last = lexRawIdentifier(First, End);
+  const char *Next = getIdentifierContinuation(Last, End);
+  if (LLVM_LIKELY(!Next))
+    return IdInfo{Last, StringRef(First, Last - First)};
+
+  // Slow path, where identifiers are split over lines.
+  SmallVector<char, 64> Id(First, Last);
+  while (Next) {
+    Last = lexRawIdentifier(Next, End);
+    Id.append(Next, Last);
+    Next = getIdentifierContinuation(Last, End);
+  }
+  return IdInfo{
+      Last,
+      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+}
+
+void Minimizer::printAdjacentMacroArgs(const char *&First,
+                                       const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End &&
+         (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
+  append(First, Last);
+  First = Last;
+}
+
+bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
+  assert(*First == '(');
+  put(*First++);
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (*First == ')') {
+      put(*First++);
+      return false;
+    }
+
+    // This is intentionally fairly liberal.
+    if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
+      return true;
+
+    printAdjacentMacroArgs(First, End);
+  }
+}
+
+/// Looks for an identifier starting from Last.
+///
+/// Updates "First" to just past the next identifier, if any.  Returns true iff
+/// the identifier matches "Id".
+bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
+                                 const char *const End) {
+  skipWhitespace(First, End);
+  if (First == End || !isIdentifierHead(*First))
+    return false;
+
+  IdInfo FoundId = lexIdentifier(First, End);
+  First = FoundId.Last;
+  return FoundId.Name == Id;
+}
+
+bool Minimizer::lexAt(const char *&First, const char *const End) {
+  // Handle "@import".
+  const char *ImportLoc = First++;
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+  makeToken(decl_at_import);
+  append("@import ");
+  if (printAtImportBody(First, End))
+    return reportError(
+        ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
+  skipWhitespace(First, End);
+  if (First == End)
+    return false;
+  if (!isVerticalWhitespace(*First))
+    return reportError(
+        ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
+  skipNewline(First, End);
+  return false;
+}
+
+bool Minimizer::lexDefine(const char *&First, const char *const End) {
+  makeToken(pp_define);
+  append("#define ");
+  skipWhitespace(First, End);
+
+  if (!isIdentifierHead(*First))
+    return reportError(First, diag::err_pp_macro_not_identifier);
+
+  IdInfo Id = lexIdentifier(First, End);
+  const char *Last = Id.Last;
+  append(Id.Name);
+  if (Last == End)
+    return false;
+  if (*Last == '(') {
+    size_t Size = Out.size();
+    if (printMacroArgs(Last, End)) {
+      // Be robust to bad macro arguments, since they can show up in disabled
+      // code.
+      Out.resize(Size);
+      append("(/* invalid */\n");
+      skipLine(Last, End);
+      return false;
+    }
+  }
+  skipWhitespace(Last, End);
+  if (Last == End)
+    return false;
+  if (!isVerticalWhitespace(*Last))
+    put(' ');
+  printDirectiveBody(Last, End);
+  First = Last;
+  return false;
+}
+
+bool Minimizer::lexPragma(const char *&First, const char *const End) {
+  // #pragma.
+  if (!isNextIdentifier("clang", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang.
+  if (!isNextIdentifier("module", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module.
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module import.
+  makeToken(pp_pragma_import);
+  append("#pragma clang module import ");
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Minimizer::lexEndif(const char *&First, const char *const End) {
+  // Strip out "#else" if it's empty.
+  if (top() == pp_else)
+    popToken();
+
+  // Strip out "#elif" if they're empty.
+  while (top() == pp_elif)
+    popToken();
+
+  // If "#if" is empty, strip it and skip the "#endif".
+  if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
+    popToken();
+    skipLine(First, End);
+    return false;
+  }
+
+  return lexDefault(pp_endif, "endif", First, End);
+}
+
+bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
+                           const char *&First, const char *const End) {
+  makeToken(Kind);
+  put('#').append(Directive).put(' ');
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Minimizer::lexPPLine(const char *&First, const char *const End) {
+  assert(First != End);
+
+  skipWhitespace(First, End);
+  assert(First <= End);
+  if (First == End)
+    return false;
+
+  if (*First != '#' && *First != '@') {
+    skipLine(First, End);
+    assert(First <= End);
+    return false;
+  }
+
+  // Handle "@import".
+  if (*First == '@')
+    return lexAt(First, End);
+
+  // Handle preprocessing directives.
+  ++First; // Skip over '#'.
+  skipWhitespace(First, End);
+
+  if (First == End)
+    return reportError(First, diag::err_pp_expected_eol);
+
+  if (!isIdentifierHead(*First)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // Figure out the token.
+  IdInfo Id = lexIdentifier(First, End);
+  First = Id.Last;
+  auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
+                  .Case("include", pp_include)
+                  .Case("__include_macros", pp___include_macros)
+                  .Case("define", pp_define)
+                  .Case("undef", pp_undef)
+                  .Case("import", pp_import)
+                  .Case("include_next", pp_include_next)
+                  .Case("if", pp_if)
+                  .Case("ifdef", pp_ifdef)
+                  .Case("ifndef", pp_ifndef)
+                  .Case("elif", pp_elif)
+                  .Case("else", pp_else)
+                  .Case("endif", pp_endif)
+                  .Case("pragma", pp_pragma_import)
+                  .Default(pp_none);
+  if (Kind == pp_none) {
+    skipDirective(Id.Name, First, End);
+    return false;
+  }
+
+  if (Kind == pp_endif)
+    return lexEndif(First, End);
+
+  if (Kind == pp_define)
+    return lexDefine(First, End);
+
+  if (Kind == pp_pragma_import)
+    return lexPragma(First, End);
+
+  // Everything else.
+  return lexDefault(Kind, Id.Name, First, End);
+}
+
+bool Minimizer::minimizeImpl(const char *First, const char *const End) {
+  while (First != End)
+    if (lexPPLine(First, End))
+      return true;
+  return false;
+}
+
+bool Minimizer::minimize() {
+  bool Error = minimizeImpl(Input.begin(), Input.end());
+
+  if (!Error) {
+    // Add a trailing newline and an EOF on success.
+    if (!Out.empty() && Out.back() != '\n')
+      Out.push_back('\n');
+    makeToken(pp_eof);
+  }
+
+  // Null-terminate the output. This way the memory buffer that's passed to
+  // Clang will not have to worry about the terminating '\0'.
+  Out.push_back(0);
+  Out.pop_back();
+  return Error;
+}
+
+bool clang::minimizeSourceToDependencyDirectives(
+    StringRef Input, SmallVectorImpl<char> &Output,
+    SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
+    SourceLocation InputSourceLoc) {
+  Output.clear();
+  Tokens.clear();
+  return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
+}
diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp
index 23cb053c2d718..e0bf58b675056 100644
--- a/lib/Lex/HeaderMap.cpp
+++ b/lib/Lex/HeaderMap.cpp
@@ -1,9 +1,8 @@
 //===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index c65fb47c0fe55..108630cc26f69 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -1,9 +1,8 @@
 //===- HeaderSearch.cpp - Resolve Header File Locations -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -335,6 +334,7 @@ const FileEntry *DirectoryLookup::LookupFile(
     Module *RequestingModule,
     ModuleMap::KnownHeader *SuggestedModule,
     bool &InUserSpecifiedSystemFramework,
+    bool &IsFrameworkFound,
     bool &HasBeenMapped,
     SmallVectorImpl<char> &MappedName) const {
   InUserSpecifiedSystemFramework = false;
@@ -363,7 +363,7 @@ const FileEntry *DirectoryLookup::LookupFile(
   if (isFramework())
     return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath,
                              RequestingModule, SuggestedModule,
-                             InUserSpecifiedSystemFramework);
+                             InUserSpecifiedSystemFramework, IsFrameworkFound);
 
   assert(isHeaderMap() && "Unknown directory lookup");
   const HeaderMap *HM = getHeaderMap();
@@ -463,7 +463,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
     StringRef Filename, HeaderSearch &HS, SmallVectorImpl<char> *SearchPath,
     SmallVectorImpl<char> *RelativePath, Module *RequestingModule,
     ModuleMap::KnownHeader *SuggestedModule,
-    bool &InUserSpecifiedSystemFramework) const {
+    bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound) const {
   FileManager &FileMgr = HS.getFileMgr();
 
   // Framework names must have a '/' in the filename.
@@ -472,7 +472,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
 
   // Find out if this is the home for the specified framework, by checking
   // HeaderSearch.  Possible answers are yes/no and unknown.
-  HeaderSearch::FrameworkCacheEntry &CacheEntry =
+  FrameworkCacheEntry &CacheEntry =
     HS.LookupFrameworkCache(Filename.substr(0, SlashPos));
 
   // If it is known and in some other directory, fail.
@@ -517,8 +517,9 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
     }
   }
 
-  // Set the 'user-specified system framework' flag.
+  // Set out flags.
   InUserSpecifiedSystemFramework = CacheEntry.IsUserSpecifiedSystemFramework;
+  IsFrameworkFound = CacheEntry.Directory;
 
   if (RelativePath) {
     RelativePath->clear();
@@ -538,7 +539,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
 
   FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());
   const FileEntry *FE = FileMgr.getFile(FrameworkName,
-                                        /*openFile=*/!SuggestedModule);
+                                        /*OpenFile=*/!SuggestedModule);
   if (!FE) {
     // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
     const char *Private = "Private";
@@ -548,7 +549,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
       SearchPath->insert(SearchPath->begin()+OrigSize, Private,
                          Private+strlen(Private));
 
-    FE = FileMgr.getFile(FrameworkName, /*openFile=*/!SuggestedModule);
+    FE = FileMgr.getFile(FrameworkName, /*OpenFile=*/!SuggestedModule);
   }
 
   // If we found the header and are allowed to suggest a module, do so now.
@@ -697,10 +698,14 @@ const FileEntry *HeaderSearch::LookupFile(
     ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers,
     SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
     Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,
-    bool *IsMapped, bool SkipCache, bool BuildSystemModule) {
+    bool *IsMapped, bool *IsFrameworkFound, bool SkipCache,
+    bool BuildSystemModule) {
   if (IsMapped)
     *IsMapped = false;
 
+  if (IsFrameworkFound)
+    *IsFrameworkFound = false;
+
   if (SuggestedModule)
     *SuggestedModule = ModuleMap::KnownHeader();
 
@@ -852,16 +857,22 @@ const FileEntry *HeaderSearch::LookupFile(
   for (; i != SearchDirs.size(); ++i) {
     bool InUserSpecifiedSystemFramework = false;
     bool HasBeenMapped = false;
+    bool IsFrameworkFoundInDir = false;
     const FileEntry *FE = SearchDirs[i].LookupFile(
         Filename, *this, IncludeLoc, SearchPath, RelativePath, RequestingModule,
-        SuggestedModule, InUserSpecifiedSystemFramework, HasBeenMapped,
-        MappedName);
+        SuggestedModule, InUserSpecifiedSystemFramework, IsFrameworkFoundInDir,
+        HasBeenMapped, MappedName);
     if (HasBeenMapped) {
       CacheLookup.MappedName =
           copyString(Filename, LookupFileCache.getAllocator());
       if (IsMapped)
         *IsMapped = true;
     }
+    if (IsFrameworkFound)
+      // Because we keep a filename remapped for subsequent search directory
+      // lookups, ignore IsFrameworkFoundInDir after the first remapping and not
+      // just for remapping in a current search directory.
+      *IsFrameworkFound |= (IsFrameworkFoundInDir && !CacheLookup.MappedName);
     if (!FE) continue;
 
     CurDir = &SearchDirs[i];
@@ -927,10 +938,10 @@ const FileEntry *HeaderSearch::LookupFile(
       ScratchFilename += '/';
       ScratchFilename += Filename;
 
-      const FileEntry *FE =
-          LookupFile(ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir,
-                     CurDir, Includers.front(), SearchPath, RelativePath,
-                     RequestingModule, SuggestedModule, IsMapped);
+      const FileEntry *FE = LookupFile(
+          ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, CurDir,
+          Includers.front(), SearchPath, RelativePath, RequestingModule,
+          SuggestedModule, IsMapped, /*IsFrameworkFound=*/nullptr);
 
       if (checkMSVCHeaderSearch(Diags, MSFE, FE, IncludeLoc)) {
         if (SuggestedModule)
@@ -1036,7 +1047,7 @@ LookupSubframeworkHeader(StringRef Filename,
   }
 
   HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
-  if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) {
+  if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true))) {
     // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h"
     HeadersFilename = FrameworkName;
     HeadersFilename += "PrivateHeaders/";
@@ -1047,7 +1058,7 @@ LookupSubframeworkHeader(StringRef Filename,
     }
 
     HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
-    if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true)))
+    if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true)))
       return nullptr;
   }
 
@@ -1571,7 +1582,7 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) {
                                 DirNative);
 
         // Search each of the ".framework" directories to load them as modules.
-        llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+        llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
         for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC),
                                            DirEnd;
              Dir != DirEnd && !EC; Dir.increment(EC)) {
@@ -1642,7 +1653,7 @@ void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) {
   FileMgr.makeAbsolutePath(Dir);
   SmallString<128> DirNative;
   llvm::sys::path::native(Dir, DirNative);
-  llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+  llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
   for (llvm::vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd;
        Dir != DirEnd && !EC; Dir.increment(EC)) {
     bool IsFramework = llvm::sys::path::extension(Dir->path()) == ".framework";
@@ -1654,34 +1665,30 @@ void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) {
   SearchDir.setSearchedAllModuleMaps(true);
 }
 
-std::string HeaderSearch::suggestPathToFileForDiagnostics(const FileEntry *File,
-                                                          bool *IsSystem) {
+std::string HeaderSearch::suggestPathToFileForDiagnostics(
+    const FileEntry *File, llvm::StringRef MainFile, bool *IsSystem) {
   // FIXME: We assume that the path name currently cached in the FileEntry is
   // the most appropriate one for this analysis (and that it's spelled the
   // same way as the corresponding header search path).
-  return suggestPathToFileForDiagnostics(File->getName(), /*BuildDir=*/"",
-                                         IsSystem);
+  return suggestPathToFileForDiagnostics(File->getName(), /*WorkingDir=*/"",
+                                         MainFile, IsSystem);
 }
 
 std::string HeaderSearch::suggestPathToFileForDiagnostics(
-    llvm::StringRef File, llvm::StringRef WorkingDir, bool *IsSystem) {
+    llvm::StringRef File, llvm::StringRef WorkingDir, llvm::StringRef MainFile,
+    bool *IsSystem) {
   using namespace llvm::sys;
 
   unsigned BestPrefixLength = 0;
-  unsigned BestSearchDir;
-
-  for (unsigned I = 0; I != SearchDirs.size(); ++I) {
-    // FIXME: Support this search within frameworks and header maps.
-    if (!SearchDirs[I].isNormalDir())
-      continue;
-
-    StringRef Dir = SearchDirs[I].getDir()->getName();
+  // Checks whether Dir and File shares a common prefix, if they do and that's
+  // the longest prefix we've seen so for it returns true and updates the
+  // BestPrefixLength accordingly.
+  auto CheckDir = [&](llvm::StringRef Dir) -> bool {
     llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
-    if (!WorkingDir.empty() && !path::is_absolute(Dir)) {
+    if (!WorkingDir.empty() && !path::is_absolute(Dir))
       fs::make_absolute(WorkingDir, DirPath);
-      path::remove_dots(DirPath, /*remove_dot_dot=*/true);
-      Dir = DirPath;
-    }
+    path::remove_dots(DirPath, /*remove_dot_dot=*/true);
+    Dir = DirPath;
     for (auto NI = path::begin(File), NE = path::end(File),
               DI = path::begin(Dir), DE = path::end(Dir);
          /*termination condition in loop*/; ++NI, ++DI) {
@@ -1700,17 +1707,37 @@ std::string HeaderSearch::suggestPathToFileForDiagnostics(
         unsigned PrefixLength = NI - path::begin(File);
         if (PrefixLength > BestPrefixLength) {
           BestPrefixLength = PrefixLength;
-          BestSearchDir = I;
+          return true;
         }
         break;
       }
 
+      // Consider all path separators equal.
+      if (NI->size() == 1 && DI->size() == 1 &&
+          path::is_separator(NI->front()) && path::is_separator(DI->front()))
+        continue;
+
       if (*NI != *DI)
         break;
     }
+    return false;
+  };
+
+  for (unsigned I = 0; I != SearchDirs.size(); ++I) {
+    // FIXME: Support this search within frameworks and header maps.
+    if (!SearchDirs[I].isNormalDir())
+      continue;
+
+    StringRef Dir = SearchDirs[I].getDir()->getName();
+    if (CheckDir(Dir) && IsSystem)
+      *IsSystem = BestPrefixLength ? I >= SystemDirIdx : false;
   }
 
-  if (IsSystem)
-    *IsSystem = BestPrefixLength ? BestSearchDir >= SystemDirIdx : false;
-  return File.drop_front(BestPrefixLength);
+  // Try to shorten include path using TUs directory, if we couldn't find any
+  // suitable prefix in include search paths.
+  if (!BestPrefixLength && CheckDir(path::parent_path(MainFile)) && IsSystem)
+    *IsSystem = false;
+
+
+  return path::convert_to_slash(File.drop_front(BestPrefixLength));
 }
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index d4723091114a1..db53e6bec0440 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1,9 +1,8 @@
 //===- Lexer.cpp - C Language Family Lexer --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -688,7 +687,6 @@ PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
       // We only end up here if we didn't recognize the preprocessor
       // directive or it was one that can't occur in the preamble at this
       // point. Roll back the current token to the location of the '#'.
-      InPreprocessorDirective = false;
       TheTok = HashTok;
     }
 
@@ -2073,7 +2071,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
 
   // Update the location of token as well as BufferPtr.
   const char *TokStart = BufferPtr;
-  FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
+  FormTokenWithChars(Result, CurPtr, tok::header_name);
   Result.setLiteralData(TokStart);
   return true;
 }
@@ -3233,7 +3231,7 @@ LexNextToken:
 
   case '\r':
     if (CurPtr[0] == '\n')
-      Char = getAndAdvanceChar(CurPtr, Result);
+      (void)getAndAdvanceChar(CurPtr, Result);
     LLVM_FALLTHROUGH;
   case '\n':
     // If we are inside a preprocessor directive and we see the end of line,
@@ -3466,7 +3464,9 @@ LexNextToken:
   case '"':
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
-    return LexStringLiteral(Result, CurPtr, tok::string_literal);
+    return LexStringLiteral(Result, CurPtr,
+                            ParsingFilename ? tok::header_name
+                                            : tok::string_literal);
 
   // C99 6.4.6: Punctuators.
   case '?':
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index fa0815eb9c6c5..2108408377fb2 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -1,9 +1,8 @@
 //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -617,10 +616,14 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       if (isHalf || isFloat || isLong || isFloat128)
         break; // HF, FF, LF, QF invalid.
 
-      if (s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
-          s += 2; // success, eat up 2 characters.
-          isFloat16 = true;
-          continue;
+      // CUDA host and device may have different _Float16 support, therefore
+      // allows f16 literals to avoid false alarm.
+      // ToDo: more precise check for CUDA.
+      if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) &&
+          s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
+        s += 2; // success, eat up 2 characters.
+        isFloat16 = true;
+        continue;
       }
 
       isFloat = true;
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index dc2ba3074a8b1..5aa4679fad462 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -1,9 +1,8 @@
 //===--- MacroArgs.cpp - Formal argument info for Macros ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -136,15 +135,12 @@ const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
   return Result;
 }
 
-// This function assumes that the variadic arguments are the tokens
-// corresponding to the last parameter (ellipsis) - and since tokens are
-// separated by the 'eof' token, if that is the only token corresponding to that
-// last parameter, we know no variadic arguments were supplied.
-bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI) const {
+bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI,
+                                            Preprocessor &PP) {
   if (!MI->isVariadic())
     return false;
   const int VariadicArgIndex = getNumMacroArguments() - 1;
-  return getUnexpArgument(VariadicArgIndex)->isNot(tok::eof);
+  return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof);
 }
 
 /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
@@ -185,7 +181,7 @@ const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,
   // list.  With this installed, we lex expanded tokens until we hit the EOF
   // token at the end of the unexp list.
   PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
-                      false /*owns tokens*/);
+                      false /*owns tokens*/, false /*is reinject*/);
 
   // Lex all of the macro-expanded tokens into Result.
   do {
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 434c120075964..1ccd140364aeb 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -1,9 +1,8 @@
 //===- MacroInfo.cpp - Information about #defined identifiers -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index cff950b703a6f..5e0be1a57da41 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -1,9 +1,8 @@
 //===- ModuleMap.cpp - Describe the layout of modules ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -807,7 +806,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
   return std::make_pair(Result, true);
 }
 
-Module *ModuleMap::createGlobalModuleForInterfaceUnit(SourceLocation Loc) {
+Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc) {
   PendingSubmodules.emplace_back(
       new Module("<global>", Loc, nullptr, /*IsFramework*/ false,
                  /*IsExplicit*/ true, NumCreatedModules++));
@@ -815,6 +814,16 @@ Module *ModuleMap::createGlobalModuleForInterfaceUnit(SourceLocation Loc) {
   return PendingSubmodules.back().get();
 }
 
+Module *
+ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent,
+                                                       SourceLocation Loc) {
+  auto *Result =
+      new Module("<private>", Loc, Parent, /*IsFramework*/ false,
+                 /*IsExplicit*/ true, NumCreatedModules++);
+  Result->Kind = Module::PrivateModuleFragment;
+  return Result;
+}
+
 Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc,
                                                 StringRef Name,
                                                 Module *GlobalModule) {
@@ -1022,7 +1031,7 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
     = StringRef(FrameworkDir->getName());
   llvm::sys::path::append(SubframeworksDirName, "Frameworks");
   llvm::sys::path::native(SubframeworksDirName);
-  llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+  llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
   for (llvm::vfs::directory_iterator
            Dir = FS.dir_begin(SubframeworksDirName, EC),
            DirEnd;
@@ -2398,7 +2407,7 @@ void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) {
     std::error_code EC;
     SmallVector<Module::Header, 6> Headers;
     llvm::vfs::FileSystem &FS =
-        *SourceMgr.getFileManager().getVirtualFileSystem();
+        SourceMgr.getFileManager().getVirtualFileSystem();
     for (llvm::vfs::recursive_directory_iterator I(FS, Dir->getName(), EC), E;
          I != E && !EC; I.increment(EC)) {
       if (const FileEntry *FE = SourceMgr.getFileManager().getFile(I->path())) {
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
index 9758557d7b448..31548d246d5a5 100644
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -1,9 +1,8 @@
 //===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@ using namespace clang;
 // be called multiple times and CommitBacktrackedTokens/Backtrack calls will
 // be combined with the EnableBacktrackAtThisPos calls in reverse order.
 void Preprocessor::EnableBacktrackAtThisPos() {
+  assert(LexLevel == 0 && "cannot use lookahead while lexing");
   BacktrackPositions.push_back(CachedLexPos);
   EnterCachingLexMode();
 }
@@ -35,29 +35,6 @@ void Preprocessor::CommitBacktrackedTokens() {
   BacktrackPositions.pop_back();
 }
 
-Preprocessor::CachedTokensRange Preprocessor::LastCachedTokenRange() {
-  assert(isBacktrackEnabled());
-  auto PrevCachedLexPos = BacktrackPositions.back();
-  return CachedTokensRange{PrevCachedLexPos, CachedLexPos};
-}
-
-void Preprocessor::EraseCachedTokens(CachedTokensRange TokenRange) {
-  assert(TokenRange.Begin <= TokenRange.End);
-  if (CachedLexPos == TokenRange.Begin && TokenRange.Begin != TokenRange.End) {
-    // We have backtracked to the start of the token range as we want to consume
-    // them again. Erase the tokens only after consuming then.
-    assert(!CachedTokenRangeToErase);
-    CachedTokenRangeToErase = TokenRange;
-    return;
-  }
-  // The cached tokens were committed, so they should be erased now.
-  assert(TokenRange.End == CachedLexPos);
-  CachedTokens.erase(CachedTokens.begin() + TokenRange.Begin,
-                     CachedTokens.begin() + TokenRange.End);
-  CachedLexPos = TokenRange.Begin;
-  ExitCachingLexMode();
-}
-
 // Make Preprocessor re-lex the tokens that were lexed since
 // EnableBacktrackAtThisPos() was previously called.
 void Preprocessor::Backtrack() {
@@ -72,15 +49,13 @@ void Preprocessor::CachingLex(Token &Result) {
   if (!InCachingLexMode())
     return;
 
+  // The assert in EnterCachingLexMode should prevent this from happening.
+  assert(LexLevel == 1 &&
+         "should not use token caching within the preprocessor");
+
   if (CachedLexPos < CachedTokens.size()) {
     Result = CachedTokens[CachedLexPos++];
-    // Erase the some of the cached tokens after they are consumed when
-    // asked to do so.
-    if (CachedTokenRangeToErase &&
-        CachedTokenRangeToErase->End == CachedLexPos) {
-      EraseCachedTokens(*CachedTokenRangeToErase);
-      CachedTokenRangeToErase = None;
-    }
+    Result.setFlag(Token::IsReinjected);
     return;
   }
 
@@ -89,14 +64,14 @@ void Preprocessor::CachingLex(Token &Result) {
 
   if (isBacktrackEnabled()) {
     // Cache the lexed token.
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
     CachedTokens.push_back(Result);
     ++CachedLexPos;
     return;
   }
 
   if (CachedLexPos < CachedTokens.size()) {
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
   } else {
     // All cached tokens were consumed.
     CachedTokens.clear();
@@ -105,11 +80,23 @@ void Preprocessor::CachingLex(Token &Result) {
 }
 
 void Preprocessor::EnterCachingLexMode() {
+  // The caching layer sits on top of all the other lexers, so it's incorrect
+  // to cache tokens while inside a nested lex action. The cached tokens would
+  // be retained after returning to the enclosing lex action and, at best,
+  // would appear at the wrong position in the token stream.
+  assert(LexLevel == 0 &&
+         "entered caching lex mode while lexing something else");
+
   if (InCachingLexMode()) {
     assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
     return;
   }
 
+  EnterCachingLexModeUnchecked();
+}
+
+void Preprocessor::EnterCachingLexModeUnchecked() {
+  assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode");
   PushIncludeMacroStack();
   CurLexerKind = CLK_CachingLexer;
 }
diff --git a/lib/Lex/PPCallbacks.cpp b/lib/Lex/PPCallbacks.cpp
index 952b926005b0a..cd8b04b20d245 100644
--- a/lib/Lex/PPCallbacks.cpp
+++ b/lib/Lex/PPCallbacks.cpp
@@ -1,9 +1,8 @@
 //===--- PPCallbacks.cpp - Callbacks for Preprocessor actions ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Lex/PPConditionalDirectiveRecord.cpp b/lib/Lex/PPConditionalDirectiveRecord.cpp
index 12a77849b8b30..facee28007c7d 100644
--- a/lib/Lex/PPConditionalDirectiveRecord.cpp
+++ b/lib/Lex/PPConditionalDirectiveRecord.cpp
@@ -1,9 +1,8 @@
 //===--- PPConditionalDirectiveRecord.h - Preprocessing Directives-*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,9 +25,8 @@ bool PPConditionalDirectiveRecord::rangeIntersectsConditionalDirective(
   if (Range.isInvalid())
     return false;
 
-  CondDirectiveLocsTy::const_iterator
-    low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
-                           Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr));
+  CondDirectiveLocsTy::const_iterator low = llvm::lower_bound(
+      CondDirectiveLocs, Range.getBegin(), CondDirectiveLoc::Comp(SourceMgr));
   if (low == CondDirectiveLocs.end())
     return false;
 
@@ -56,9 +54,8 @@ SourceLocation PPConditionalDirectiveRecord::findConditionalDirectiveRegionLoc(
                                           Loc))
     return CondDirectiveStack.back();
 
-  CondDirectiveLocsTy::const_iterator
-    low = std::lower_bound(CondDirectiveLocs.begin(), CondDirectiveLocs.end(),
-                           Loc, CondDirectiveLoc::Comp(SourceMgr));
+  CondDirectiveLocsTy::const_iterator low = llvm::lower_bound(
+      CondDirectiveLocs, Loc, CondDirectiveLoc::Comp(SourceMgr));
   assert(low != CondDirectiveLocs.end());
   return low->getRegionLoc();
 }
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index d62a3513c7770..2756042f23eb2 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -1,9 +1,8 @@
 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -79,12 +78,18 @@ Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
 
 /// Read and discard all tokens remaining on the current line until
 /// the tok::eod token is found.
-void Preprocessor::DiscardUntilEndOfDirective() {
+SourceRange Preprocessor::DiscardUntilEndOfDirective() {
   Token Tmp;
-  do {
-    LexUnexpandedToken(Tmp);
+  SourceRange Res;
+
+  LexUnexpandedToken(Tmp);
+  Res.setBegin(Tmp.getLocation());
+  while (Tmp.isNot(tok::eod)) {
     assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
-  } while (Tmp.isNot(tok::eod));
+    LexUnexpandedToken(Tmp);
+  }
+  Res.setEnd(Tmp.getLocation());
+  return Res;
 }
 
 /// Enumerates possible cases of #define/#undef a reserved identifier.
@@ -331,7 +336,10 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
 ///
 /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
 /// true, then we consider macros that expand to zero tokens as being ok.
-void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {
+///
+/// Returns the location of the end of the directive.
+SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
+                                                 bool EnableMacros) {
   Token Tmp;
   // Lex unexpanded tokens for most directives: macros might expand to zero
   // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
@@ -346,18 +354,19 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {
   while (Tmp.is(tok::comment))  // Skip comments in -C mode.
     LexUnexpandedToken(Tmp);
 
-  if (Tmp.isNot(tok::eod)) {
-    // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
-    // or if this is a macro-style preprocessing directive, because it is more
-    // trouble than it is worth to insert /**/ and check that there is no /**/
-    // in the range also.
-    FixItHint Hint;
-    if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
-        !CurTokenLexer)
-      Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
-    Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
-    DiscardUntilEndOfDirective();
-  }
+  if (Tmp.is(tok::eod))
+    return Tmp.getLocation();
+
+  // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
+  // or if this is a macro-style preprocessing directive, because it is more
+  // trouble than it is worth to insert /**/ and check that there is no /**/
+  // in the range also.
+  FixItHint Hint;
+  if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
+      !CurTokenLexer)
+    Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
+  Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
+  return DiscardUntilEndOfDirective().getEnd();
 }
 
 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
@@ -538,19 +547,19 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
           DiscardUntilEndOfDirective();
         } else {
-          const SourceLocation CondBegin = CurPPLexer->getSourceLocation();
           // Restore the value of LexingRawMode so that identifiers are
           // looked up, etc, inside the #elif expression.
           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
           CurPPLexer->LexingRawMode = false;
           IdentifierInfo *IfNDefMacro = nullptr;
-          const bool CondValue = EvaluateDirectiveExpression(IfNDefMacro).Conditional;
+          DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
+          const bool CondValue = DER.Conditional;
           CurPPLexer->LexingRawMode = true;
           if (Callbacks) {
-            const SourceLocation CondEnd = CurPPLexer->getSourceLocation();
-            Callbacks->Elif(Tok.getLocation(),
-                            SourceRange(CondBegin, CondEnd),
-                            (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False), CondInfo.IfLoc);
+            Callbacks->Elif(
+                Tok.getLocation(), DER.ExprRange,
+                (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
+                CondInfo.IfLoc);
           }
           // If this condition is true, enter it!
           if (CondValue) {
@@ -605,9 +614,16 @@ Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
                                                      SourceLocation Loc) {
   assert(M && "no module to include");
 
+  // If the context is the global module fragment of some module, we never
+  // want to return that file; instead, we want the innermost include-guarded
+  // header that it included.
+  bool InGlobalModuleFragment = M->Kind == Module::GlobalModuleFragment;
+
   // If we have a module import syntax, we shouldn't include a header to
   // make a particular module visible.
-  if (getLangOpts().ObjC)
+  if ((getLangOpts().ObjC || getLangOpts().CPlusPlusModules ||
+       getLangOpts().ModulesTS) &&
+      !InGlobalModuleFragment)
     return nullptr;
 
   Module *TopM = M->getTopLevelModule();
@@ -624,6 +640,13 @@ Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
     if (!FE)
       break;
 
+    if (InGlobalModuleFragment) {
+      if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE))
+        return FE;
+      Loc = SM.getIncludeLoc(ID);
+      continue;
+    }
+
     bool InTextualHeader = false;
     for (auto Header : HeaderInfo.getModuleMap().findAllModulesForHeader(FE)) {
       if (!Header.getModule()->isSubModuleOf(TopM))
@@ -660,7 +683,8 @@ const FileEntry *Preprocessor::LookupFile(
     const DirectoryLookup *FromDir, const FileEntry *FromFile,
     const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath,
     SmallVectorImpl<char> *RelativePath,
-    ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool SkipCache) {
+    ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
+    bool *IsFrameworkFound, bool SkipCache) {
   Module *RequestingModule = getModuleForLocation(FilenameLoc);
   bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc);
 
@@ -718,7 +742,8 @@ const FileEntry *Preprocessor::LookupFile(
     while (const FileEntry *FE = HeaderInfo.LookupFile(
                Filename, FilenameLoc, isAngled, TmpFromDir, TmpCurDir,
                Includers, SearchPath, RelativePath, RequestingModule,
-               SuggestedModule, /*IsMapped=*/nullptr, SkipCache)) {
+               SuggestedModule, /*IsMapped=*/nullptr,
+               /*IsFrameworkFound=*/nullptr, SkipCache)) {
       // Keep looking as if this file did a #include_next.
       TmpFromDir = TmpCurDir;
       ++TmpFromDir;
@@ -734,8 +759,8 @@ const FileEntry *Preprocessor::LookupFile(
   // Do a standard file entry lookup.
   const FileEntry *FE = HeaderInfo.LookupFile(
       Filename, FilenameLoc, isAngled, FromDir, CurDir, Includers, SearchPath,
-      RelativePath, RequestingModule, SuggestedModule, IsMapped, SkipCache,
-      BuildSystemModule);
+      RelativePath, RequestingModule, SuggestedModule, IsMapped,
+      IsFrameworkFound, SkipCache, BuildSystemModule);
   if (FE) {
     if (SuggestedModule && !LangOpts.AsmPreprocessor)
       HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
@@ -822,10 +847,10 @@ void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
       return HandleIncludeDirective(HashLoc, Result);
     }
     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
-      Token P = LookAhead(0);
-      auto *II = P.getIdentifierInfo();
+      Lex(Result);
+      auto *II = Result.getIdentifierInfo();
       if (II && II->getName() == "hdrstop")
-        return HandlePragmaDirective(HashLoc, PIK_HashPragma);
+        return HandlePragmaHdrstop(Result);
     }
   }
   DiscardUntilEndOfDirective();
@@ -879,6 +904,8 @@ void Preprocessor::HandleDirective(Token &Result) {
       case tok::pp___include_macros:
       case tok::pp_pragma:
         Diag(Result, diag::err_embedded_directive) << II->getName();
+        Diag(*ArgMacro, diag::note_macro_expansion_here)
+            << ArgMacro->getIdentifierInfo();
         DiscardUntilEndOfDirective();
         return;
       default:
@@ -955,7 +982,7 @@ void Preprocessor::HandleDirective(Token &Result) {
 
     // C99 6.10.6 - Pragma Directive.
     case tok::pp_pragma:
-      return HandlePragmaDirective(SavedHash.getLocation(), PIK_HashPragma);
+      return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
 
     // GNU Extensions.
     case tok::pp_import:
@@ -1008,7 +1035,7 @@ void Preprocessor::HandleDirective(Token &Result) {
     // Enter this token stream so that we re-lex the tokens.  Make sure to
     // enable macro expansion, in case the token after the # is an identifier
     // that is expanded.
-    EnterTokenStream(std::move(Toks), 2, false);
+    EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
     return;
   }
 
@@ -1116,19 +1143,24 @@ void Preprocessor::HandleLineDirective() {
     ; // ok
   else if (StrTok.isNot(tok::string_literal)) {
     Diag(StrTok, diag::err_pp_line_invalid_filename);
-    return DiscardUntilEndOfDirective();
+    DiscardUntilEndOfDirective();
+    return;
   } else if (StrTok.hasUDSuffix()) {
     Diag(StrTok, diag::err_invalid_string_udl);
-    return DiscardUntilEndOfDirective();
+    DiscardUntilEndOfDirective();
+    return;
   } else {
     // Parse and validate the string, converting it into a unique ID.
     StringLiteralParser Literal(StrTok, *this);
     assert(Literal.isAscii() && "Didn't allow wide strings in");
-    if (Literal.hadError)
-      return DiscardUntilEndOfDirective();
+    if (Literal.hadError) {
+      DiscardUntilEndOfDirective();
+      return;
+    }
     if (Literal.Pascal) {
       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
-      return DiscardUntilEndOfDirective();
+      DiscardUntilEndOfDirective();
+      return;
     }
     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
 
@@ -1261,19 +1293,24 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
     FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
   } else if (StrTok.isNot(tok::string_literal)) {
     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
-    return DiscardUntilEndOfDirective();
+    DiscardUntilEndOfDirective();
+    return;
   } else if (StrTok.hasUDSuffix()) {
     Diag(StrTok, diag::err_invalid_string_udl);
-    return DiscardUntilEndOfDirective();
+    DiscardUntilEndOfDirective();
+    return;
   } else {
     // Parse and validate the string, converting it into a unique ID.
     StringLiteralParser Literal(StrTok, *this);
     assert(Literal.isAscii() && "Didn't allow wide strings in");
-    if (Literal.hadError)
-      return DiscardUntilEndOfDirective();
+    if (Literal.hadError) {
+      DiscardUntilEndOfDirective();
+      return;
+    }
     if (Literal.Pascal) {
       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
-      return DiscardUntilEndOfDirective();
+      DiscardUntilEndOfDirective();
+      return;
     }
     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
 
@@ -1343,7 +1380,8 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
 
   if (StrTok.hasUDSuffix()) {
     Diag(StrTok, diag::err_invalid_string_udl);
-    return DiscardUntilEndOfDirective();
+    DiscardUntilEndOfDirective();
+    return;
   }
 
   // Verify that there is nothing after the string, other than EOD.
@@ -1381,7 +1419,7 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
 
   // Note that this macro has now been exported.
   appendMacroDirective(II, AllocateVisibilityMacroDirective(
-                                MacroNameTok.getLocation(), /*IsPublic=*/true));
+                                MacroNameTok.getLocation(), /*isPublic=*/true));
 }
 
 /// Handle a #private directive.
@@ -1408,7 +1446,7 @@ void Preprocessor::HandleMacroPrivateDirective() {
 
   // Note that this macro has now been marked private.
   appendMacroDirective(II, AllocateVisibilityMacroDirective(
-                               MacroNameTok.getLocation(), /*IsPublic=*/false));
+                               MacroNameTok.getLocation(), /*isPublic=*/false));
 }
 
 //===----------------------------------------------------------------------===//
@@ -1426,6 +1464,14 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
   // Get the text form of the filename.
   assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
 
+  // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
+  // C++20 [lex.header]/2:
+  //
+  // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
+  //   in C: behavior is undefined
+  //   in C++: program is conditionally-supported with implementation-defined
+  //           semantics
+
   // Make sure the filename is <x> or "x".
   bool isAngled;
   if (Buffer[0] == '<') {
@@ -1460,67 +1506,6 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
   return isAngled;
 }
 
-// Handle cases where the \#include name is expanded from a macro
-// as multiple tokens, which need to be glued together.
-//
-// This occurs for code like:
-// \code
-//    \#define FOO <a/b.h>
-//    \#include FOO
-// \endcode
-// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
-//
-// This code concatenates and consumes tokens up to the '>' token.  It returns
-// false if the > was found, otherwise it returns true if it finds and consumes
-// the EOD marker.
-bool Preprocessor::ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
-                                          SourceLocation &End) {
-  Token CurTok;
-
-  Lex(CurTok);
-  while (CurTok.isNot(tok::eod)) {
-    End = CurTok.getLocation();
-
-    // FIXME: Provide code completion for #includes.
-    if (CurTok.is(tok::code_completion)) {
-      setCodeCompletionReached();
-      Lex(CurTok);
-      continue;
-    }
-
-    // Append the spelling of this token to the buffer. If there was a space
-    // before it, add it now.
-    if (CurTok.hasLeadingSpace())
-      FilenameBuffer.push_back(' ');
-
-    // Get the spelling of the token, directly into FilenameBuffer if possible.
-    size_t PreAppendSize = FilenameBuffer.size();
-    FilenameBuffer.resize(PreAppendSize+CurTok.getLength());
-
-    const char *BufPtr = &FilenameBuffer[PreAppendSize];
-    unsigned ActualLen = getSpelling(CurTok, BufPtr);
-
-    // If the token was spelled somewhere else, copy it into FilenameBuffer.
-    if (BufPtr != &FilenameBuffer[PreAppendSize])
-      memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
-
-    // Resize FilenameBuffer to the correct size.
-    if (CurTok.getLength() != ActualLen)
-      FilenameBuffer.resize(PreAppendSize+ActualLen);
-
-    // If we found the '>' marker, return success.
-    if (CurTok.is(tok::greater))
-      return false;
-
-    Lex(CurTok);
-  }
-
-  // If we hit the eod marker, emit an error and return true so that the caller
-  // knows the EOD has been read.
-  Diag(CurTok.getLocation(), diag::err_pp_expects_filename);
-  return true;
-}
-
 /// Push a token onto the token stream containing an annotation.
 void Preprocessor::EnterAnnotationToken(SourceRange Range,
                                         tok::TokenKind Kind,
@@ -1533,7 +1518,7 @@ void Preprocessor::EnterAnnotationToken(SourceRange Range,
   Tok[0].setLocation(Range.getBegin());
   Tok[0].setAnnotationEndLoc(Range.getEnd());
   Tok[0].setAnnotationValue(AnnotationVal);
-  EnterTokenStream(std::move(Tok), 1, true);
+  EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
 }
 
 /// Produce a diagnostic informing the user that a #include or similar
@@ -1542,7 +1527,13 @@ static void diagnoseAutoModuleImport(
     Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
     ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
     SourceLocation PathEnd) {
-  assert(PP.getLangOpts().ObjC && "no import syntax available");
+  StringRef ImportKeyword;
+  if (PP.getLangOpts().ObjC)
+    ImportKeyword = "@import";
+  else if (PP.getLangOpts().ModulesTS || PP.getLangOpts().CPlusPlusModules)
+    ImportKeyword = "import";
+  else
+    return; // no import syntax available
 
   SmallString<128> PathString;
   for (size_t I = 0, N = Path.size(); I != N; ++I) {
@@ -1577,8 +1568,8 @@ static void diagnoseAutoModuleImport(
                                /*IsTokenRange=*/false);
   PP.Diag(HashLoc, diag::warn_auto_module_import)
       << IncludeKind << PathString
-      << FixItHint::CreateReplacement(ReplaceRange,
-                                      ("@import " + PathString + ";").str());
+      << FixItHint::CreateReplacement(
+             ReplaceRange, (ImportKeyword + " " + PathString + ";").str());
 }
 
 // Given a vector of path components and a string containing the real
@@ -1648,72 +1639,79 @@ bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
                                           Token &IncludeTok,
                                           const DirectoryLookup *LookupFrom,
-                                          const FileEntry *LookupFromFile,
-                                          bool isImport) {
+                                          const FileEntry *LookupFromFile) {
   Token FilenameTok;
-  CurPPLexer->LexIncludeFilename(FilenameTok);
-
-  // Reserve a buffer to get the spelling.
-  SmallString<128> FilenameBuffer;
-  StringRef Filename;
-  SourceLocation End;
-  SourceLocation CharEnd; // the end of this directive, in characters
+  if (LexHeaderName(FilenameTok))
+    return;
 
-  switch (FilenameTok.getKind()) {
-  case tok::eod:
-    // If the token kind is EOD, the error has already been diagnosed.
+  if (FilenameTok.isNot(tok::header_name)) {
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+    if (FilenameTok.isNot(tok::eod))
+      DiscardUntilEndOfDirective();
     return;
+  }
 
-  case tok::angle_string_literal:
-  case tok::string_literal:
-    Filename = getSpelling(FilenameTok, FilenameBuffer);
-    End = FilenameTok.getLocation();
-    CharEnd = End.getLocWithOffset(FilenameTok.getLength());
-    break;
+  // Verify that there is nothing after the filename, other than EOD.  Note
+  // that we allow macros that expand to nothing after the filename, because
+  // this falls into the category of "#include pp-tokens new-line" specified
+  // in C99 6.10.2p4.
+  SourceLocation EndLoc =
+      CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
 
-  case tok::less:
-    // This could be a <foo/bar.h> file coming from a macro expansion.  In this
-    // case, glue the tokens together into FilenameBuffer and interpret those.
-    FilenameBuffer.push_back('<');
-    if (ConcatenateIncludeName(FilenameBuffer, End))
-      return;   // Found <eod> but no ">"?  Diagnostic already emitted.
-    Filename = FilenameBuffer;
-    CharEnd = End.getLocWithOffset(1);
+  auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
+                                            EndLoc, LookupFrom, LookupFromFile);
+  switch (Action.Kind) {
+  case ImportAction::None:
+  case ImportAction::SkippedModuleImport:
+    break;
+  case ImportAction::ModuleBegin:
+    EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
+                         tok::annot_module_begin, Action.ModuleForHeader);
+    break;
+  case ImportAction::ModuleImport:
+    EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
+                         tok::annot_module_include, Action.ModuleForHeader);
     break;
-  default:
-    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
-    DiscardUntilEndOfDirective();
-    return;
   }
+}
+
+/// Handle either a #include-like directive or an import declaration that names
+/// a header file.
+///
+/// \param HashLoc The location of the '#' token for an include, or
+///        SourceLocation() for an import declaration.
+/// \param IncludeTok The include / include_next / import token.
+/// \param FilenameTok The header-name token.
+/// \param EndLoc The location at which any imported macros become visible.
+/// \param LookupFrom For #include_next, the starting directory for the
+///        directory lookup.
+/// \param LookupFromFile For #include_next, the starting file for the directory
+///        lookup.
+Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
+    SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
+    SourceLocation EndLoc, const DirectoryLookup *LookupFrom,
+    const FileEntry *LookupFromFile) {
+  SmallString<128> FilenameBuffer;
+  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+  SourceLocation CharEnd = FilenameTok.getEndLoc();
 
   CharSourceRange FilenameRange
     = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
   StringRef OriginalFilename = Filename;
   bool isAngled =
     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+
   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
   // error.
-  if (Filename.empty()) {
-    DiscardUntilEndOfDirective();
-    return;
-  }
+  if (Filename.empty())
+    return {ImportAction::None};
 
-  // Verify that there is nothing after the filename, other than EOD.  Note that
-  // we allow macros that expand to nothing after the filename, because this
-  // falls into the category of "#include pp-tokens new-line" specified in
-  // C99 6.10.2p4.
-  CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
-
-  // Check that we don't have infinite #include recursion.
-  if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
-    Diag(FilenameTok, diag::err_pp_include_too_deep);
-    HasReachedMaxIncludeDepth = true;
-    return;
-  }
+  bool IsImportDecl = HashLoc.isInvalid();
+  SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
 
   // Complain about attempts to #include files in an audit pragma.
   if (PragmaARCCFCodeAuditedLoc.isValid()) {
-    Diag(HashLoc, diag::err_pp_include_in_arc_cf_code_audited);
+    Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
     Diag(PragmaARCCFCodeAuditedLoc, diag::note_pragma_entered_here);
 
     // Immediately leave the pragma.
@@ -1722,7 +1720,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
 
   // Complain about attempts to #include files in an assume-nonnull pragma.
   if (PragmaAssumeNonNullLoc.isValid()) {
-    Diag(HashLoc, diag::err_pp_include_in_assume_nonnull);
+    Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
     Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
 
     // Immediately leave the pragma.
@@ -1740,6 +1738,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
 
   // Search include directories.
   bool IsMapped = false;
+  bool IsFrameworkFound = false;
   const DirectoryLookup *CurDir;
   SmallString<1024> SearchPath;
   SmallString<1024> RelativePath;
@@ -1758,7 +1757,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       FilenameLoc, LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename,
       isAngled, LookupFrom, LookupFromFile, CurDir,
       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
-      &SuggestedModule, &IsMapped);
+      &SuggestedModule, &IsMapped, &IsFrameworkFound);
 
   if (!File) {
     if (Callbacks) {
@@ -1775,7 +1774,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
               FilenameLoc,
               LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled,
               LookupFrom, LookupFromFile, CurDir, nullptr, nullptr,
-              &SuggestedModule, &IsMapped, /*SkipCache*/ true);
+              &SuggestedModule, &IsMapped, /*IsFrameworkFound=*/nullptr,
+              /*SkipCache*/ true);
         }
       }
     }
@@ -1790,12 +1790,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
             LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, false,
             LookupFrom, LookupFromFile, CurDir,
             Callbacks ? &SearchPath : nullptr,
-            Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped);
+            Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
+            /*IsFrameworkFound=*/nullptr);
         if (File) {
-          SourceRange Range(FilenameTok.getLocation(), CharEnd);
-          Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal) <<
-            Filename <<
-            FixItHint::CreateReplacement(Range, "\"" + Filename.str() + "\"");
+          Diag(FilenameTok,
+               diag::err_pp_file_not_found_angled_include_not_fatal)
+              << Filename << IsImportDecl
+              << FixItHint::CreateReplacement(FilenameRange,
+                                              "\"" + Filename.str() + "\"");
         }
       }
 
@@ -1826,14 +1828,15 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
                                 : TypoCorrectionName,
             isAngled, LookupFrom, LookupFromFile, CurDir,
             Callbacks ? &SearchPath : nullptr,
-            Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped);
+            Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
+            /*IsFrameworkFound=*/nullptr);
         if (File) {
-          SourceRange Range(FilenameTok.getLocation(), CharEnd);
-          auto Hint = isAngled
-                          ? FixItHint::CreateReplacement(
-                                Range, "<" + TypoCorrectionName.str() + ">")
-                          : FixItHint::CreateReplacement(
-                                Range, "\"" + TypoCorrectionName.str() + "\"");
+          auto Hint =
+              isAngled
+                  ? FixItHint::CreateReplacement(
+                        FilenameRange, "<" + TypoCorrectionName.str() + ">")
+                  : FixItHint::CreateReplacement(
+                        FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
           Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
               << OriginalFilename << TypoCorrectionName << Hint;
           // We found the file, so set the Filename to the name after typo
@@ -1843,38 +1846,63 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       }
 
       // If the file is still not found, just go with the vanilla diagnostic
-      if (!File)
+      if (!File) {
         Diag(FilenameTok, diag::err_pp_file_not_found) << OriginalFilename
                                                        << FilenameRange;
+        if (IsFrameworkFound) {
+          size_t SlashPos = OriginalFilename.find('/');
+          assert(SlashPos != StringRef::npos &&
+                 "Include with framework name should have '/' in the filename");
+          StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
+          FrameworkCacheEntry &CacheEntry =
+              HeaderInfo.LookupFrameworkCache(FrameworkName);
+          assert(CacheEntry.Directory && "Found framework should be in cache");
+          Diag(FilenameTok, diag::note_pp_framework_without_header)
+              << OriginalFilename.substr(SlashPos + 1) << FrameworkName
+              << CacheEntry.Directory->getName();
+        }
+      }
     }
   }
 
   if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
     if (isPCHThroughHeader(File))
       SkippingUntilPCHThroughHeader = false;
-    return;
+    return {ImportAction::None};
+  }
+
+  // Check for circular inclusion of the main file.
+  // We can't generate a consistent preamble with regard to the conditional
+  // stack if the main file is included again as due to the preamble bounds
+  // some directives (e.g. #endif of a header guard) will never be seen.
+  // Since this will lead to confusing errors, avoid the inclusion.
+  if (File && PreambleConditionalStack.isRecording() &&
+      SourceMgr.translateFile(File) == SourceMgr.getMainFileID()) {
+    Diag(FilenameTok.getLocation(),
+         diag::err_pp_including_mainfile_in_preamble);
+    return {ImportAction::None};
   }
 
-  // Should we enter the source file? Set to false if either the source file is
+  // Should we enter the source file? Set to Skip if either the source file is
   // known to have no effect beyond its effect on module visibility -- that is,
-  // if it's got an include guard that is already defined or is a modular header
-  // we've imported or already built.
-  bool ShouldEnter = true;
+  // if it's got an include guard that is already defined, set to Import if it
+  // is a modular header we've already built and should import.
+  enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
 
   if (PPOpts->SingleFileParseMode)
-    ShouldEnter = false;
+    Action = IncludeLimitReached;
 
   // If we've reached the max allowed include depth, it is usually due to an
   // include cycle. Don't enter already processed files again as it can lead to
   // reaching the max allowed include depth again.
-  if (ShouldEnter && HasReachedMaxIncludeDepth && File &&
+  if (Action == Enter && HasReachedMaxIncludeDepth && File &&
       HeaderInfo.getFileInfo(File).NumIncludes)
-    ShouldEnter = false;
+    Action = IncludeLimitReached;
 
   // Determine whether we should try to import the module for this #include, if
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
-  if (ShouldEnter && File && SuggestedModule && getLangOpts().Modules &&
+  if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
       !isForModuleBuilding(SuggestedModule.getModule(),
                            getLangOpts().CurrentModule,
                            getLangOpts().ModuleName)) {
@@ -1887,7 +1915,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       Diag(FilenameTok.getLocation(),
            diag::note_implicit_top_level_module_import_here)
           << SuggestedModule.getModule()->getTopLevelModuleName();
-      return;
+      return {ImportAction::None};
     }
 
     // Compute the module access path corresponding to this module.
@@ -1900,9 +1928,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     std::reverse(Path.begin(), Path.end());
 
     // Warn that we're replacing the include/import with a module import.
-    // We only do this in Objective-C, where we have a module-import syntax.
-    if (getLangOpts().ObjC)
-      diagnoseAutoModuleImport(*this, HashLoc, IncludeTok, Path, CharEnd);
+    if (!IsImportDecl)
+      diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
 
     // Load the module to import its macros. We'll make the declarations
     // visible when the parser gets here.
@@ -1910,13 +1937,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     // and making the module loader convert it back again.
     ModuleLoadResult Imported = TheModuleLoader.loadModule(
         IncludeTok.getLocation(), Path, Module::Hidden,
-        /*IsIncludeDirective=*/true);
+        /*IsInclusionDirective=*/true);
     assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
            "the imported module is different than the suggested one");
 
-    if (Imported)
-      ShouldEnter = false;
-    else if (Imported.isMissingExpected()) {
+    if (Imported) {
+      Action = Import;
+    } else if (Imported.isMissingExpected()) {
       // We failed to find a submodule that we assumed would exist (because it
       // was in the directory of an umbrella header, for instance), but no
       // actual module containing it exists (because the umbrella header is
@@ -1935,7 +1962,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
         CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
         CurLexer->cutOffLexing();
       }
-      return;
+      return {ImportAction::None};
     }
   }
 
@@ -1947,33 +1974,54 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   if (File)
     FileCharacter = std::max(HeaderInfo.getFileDirFlavor(File), FileCharacter);
 
+  // If this is a '#import' or an import-declaration, don't re-enter the file.
+  //
+  // FIXME: If we have a suggested module for a '#include', and we've already
+  // visited this file, don't bother entering it again. We know it has no
+  // further effect.
+  bool EnterOnce =
+      IsImportDecl ||
+      IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
+
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
-  bool SkipHeader = false;
-  if (ShouldEnter && File &&
-      !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
+  if (Action == Enter && File &&
+      !HeaderInfo.ShouldEnterIncludeFile(*this, File, EnterOnce,
                                          getLangOpts().Modules,
                                          SuggestedModule.getModule())) {
-    ShouldEnter = false;
-    SkipHeader = true;
+    // Even if we've already preprocessed this header once and know that we
+    // don't need to see its contents again, we still need to import it if it's
+    // modular because we might not have imported it from this submodule before.
+    //
+    // FIXME: We don't do this when compiling a PCH because the AST
+    // serialization layer can't cope with it. This means we get local
+    // submodule visibility semantics wrong in that case.
+    Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
   }
 
-  if (Callbacks) {
+  if (Callbacks && !IsImportDecl) {
     // Notify the callback object that we've seen an inclusion directive.
+    // FIXME: Use a different callback for a pp-import?
     Callbacks->InclusionDirective(
         HashLoc, IncludeTok,
         LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled,
         FilenameRange, File, SearchPath, RelativePath,
-        ShouldEnter ? nullptr : SuggestedModule.getModule(), FileCharacter);
-    if (SkipHeader && !SuggestedModule.getModule())
+        Action == Import ? SuggestedModule.getModule() : nullptr,
+        FileCharacter);
+    if (Action == Skip)
       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
   }
 
   if (!File)
-    return;
+    return {ImportAction::None};
 
-  // FIXME: If we have a suggested module, and we've already visited this file,
-  // don't bother entering it again. We know it has no further effect.
+  // If this is a C++20 pp-import declaration, diagnose if we didn't find any
+  // module corresponding to the named header.
+  if (IsImportDecl && !SuggestedModule) {
+    Diag(FilenameTok, diag::err_header_import_not_header_unit)
+      << OriginalFilename << File->getName();
+    return {ImportAction::None};
+  }
 
   // Issue a diagnostic if the name of the file on disk has a different case
   // than the one we're about to open.
@@ -2005,37 +2053,50 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
       // For other system headers, we don't. They can be controlled separately.
       auto DiagId = (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) ?
           diag::pp_nonportable_path : diag::pp_nonportable_system_path;
-      SourceRange Range(FilenameTok.getLocation(), CharEnd);
       Diag(FilenameTok, DiagId) << Path <<
-        FixItHint::CreateReplacement(Range, Path);
+        FixItHint::CreateReplacement(FilenameRange, Path);
     }
   }
 
-  // If we don't need to enter the file, stop now.
-  if (!ShouldEnter) {
+  switch (Action) {
+  case Skip:
+    // If we don't need to enter the file, stop now.
+    if (Module *M = SuggestedModule.getModule())
+      return {ImportAction::SkippedModuleImport, M};
+    return {ImportAction::None};
+
+  case IncludeLimitReached:
+    // If we reached our include limit and don't want to enter any more files,
+    // don't go any further.
+    return {ImportAction::None};
+
+  case Import: {
     // If this is a module import, make it visible if needed.
-    if (auto *M = SuggestedModule.getModule()) {
-      // When building a pch, -fmodule-name tells the compiler to textually
-      // include headers in the specified module. But it is possible that
-      // ShouldEnter is false because we are skipping the header. In that
-      // case, We are not importing the specified module.
-      if (SkipHeader && getLangOpts().CompilingPCH &&
-          isForModuleBuilding(M, getLangOpts().CurrentModule,
-                              getLangOpts().ModuleName))
-        return;
+    Module *M = SuggestedModule.getModule();
+    assert(M && "no module to import");
 
-      makeModuleVisible(M, HashLoc);
+    makeModuleVisible(M, EndLoc);
 
-      if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
-          tok::pp___include_macros)
-        EnterAnnotationToken(SourceRange(HashLoc, End),
-                             tok::annot_module_include, M);
-    }
-    return;
+    if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
+        tok::pp___include_macros)
+      return {ImportAction::None};
+
+    return {ImportAction::ModuleImport, M};
+  }
+
+  case Enter:
+    break;
+  }
+
+  // Check that we don't have infinite #include recursion.
+  if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
+    Diag(FilenameTok, diag::err_pp_include_too_deep);
+    HasReachedMaxIncludeDepth = true;
+    return {ImportAction::None};
   }
 
   // Look up the file, create a File ID for it.
-  SourceLocation IncludePos = End;
+  SourceLocation IncludePos = FilenameTok.getLocation();
   // If the filename string was the result of macro expansions, set the include
   // position on the file where it will be included and after the expansions.
   if (IncludePos.isMacroID())
@@ -2045,7 +2106,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
 
   // If all is good, enter the new file!
   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation()))
-    return;
+    return {ImportAction::None};
 
   // Determine if we're switching to building a new submodule, and which one.
   if (auto *M = SuggestedModule.getModule()) {
@@ -2056,29 +2117,37 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
         << M->getFullModuleName();
       Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
            diag::note_previous_definition);
-      return;
+      return {ImportAction::None};
     }
     // When building a pch, -fmodule-name tells the compiler to textually
     // include headers in the specified module. We are not building the
     // specified module.
+    //
+    // FIXME: This is the wrong way to handle this. We should produce a PCH
+    // that behaves the same as the header would behave in a compilation using
+    // that PCH, which means we should enter the submodule. We need to teach
+    // the AST serialization layer to deal with the resulting AST.
     if (getLangOpts().CompilingPCH &&
         isForModuleBuilding(M, getLangOpts().CurrentModule,
                             getLangOpts().ModuleName))
-      return;
+      return {ImportAction::None};
 
     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
     CurLexerSubmodule = M;
 
     // Let the macro handling code know that any future macros are within
     // the new submodule.
-    EnterSubmodule(M, HashLoc, /*ForPragma*/false);
+    EnterSubmodule(M, EndLoc, /*ForPragma*/false);
 
     // Let the parser know that any future declarations are within the new
     // submodule.
     // FIXME: There's no point doing this if we're handling a #__include_macros
     // directive.
-    EnterAnnotationToken(SourceRange(HashLoc, End), tok::annot_module_begin, M);
+    return {ImportAction::ModuleBegin, M};
   }
+
+  assert(!IsImportDecl && "failed to diagnose missing module for import decl");
+  return {ImportAction::None};
 }
 
 /// HandleIncludeNextDirective - Implements \#include_next.
@@ -2106,6 +2175,10 @@ void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
     LookupFromFile = CurPPLexer->getFileEntry();
     Lookup = nullptr;
   } else if (!Lookup) {
+    // The current file was not found by walking the include path. Either it
+    // is the primary file (handled above), or it was found by absolute path,
+    // or it was found relative to such a file.
+    // FIXME: Track enough information so we know which case we're in.
     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
   } else {
     // Start looking up in the next directory.
@@ -2139,7 +2212,7 @@ void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
       return HandleMicrosoftImportDirective(ImportTok);
     Diag(ImportTok, diag::ext_pp_import_directive);
   }
-  return HandleIncludeDirective(HashLoc, ImportTok, nullptr, nullptr, true);
+  return HandleIncludeDirective(HashLoc, ImportTok);
 }
 
 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
@@ -2198,8 +2271,7 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
 
       // OpenCL v1.2 s6.9.e: variadic macros are not supported.
       if (LangOpts.OpenCL) {
-        Diag(Tok, diag::err_pp_opencl_variadic_macros);
-        return true;
+        Diag(Tok, diag::ext_pp_opencl_variadic_macros);
       }
 
       // Lex the token after the identifier.
@@ -2228,8 +2300,7 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
 
       // If this is already used as a parameter, it is used multiple times (e.g.
       // #define X(A,A.
-      if (std::find(Parameters.begin(), Parameters.end(), II) !=
-          Parameters.end()) {  // C99 6.10.3p6
+      if (llvm::find(Parameters, II) != Parameters.end()) { // C99 6.10.3p6
         Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
         return true;
       }
@@ -2791,10 +2862,8 @@ void Preprocessor::HandleIfDirective(Token &IfToken,
 
   // Parse and evaluate the conditional expression.
   IdentifierInfo *IfNDefMacro = nullptr;
-  const SourceLocation ConditionalBegin = CurPPLexer->getSourceLocation();
   const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
   const bool ConditionalTrue = DER.Conditional;
-  const SourceLocation ConditionalEnd = CurPPLexer->getSourceLocation();
 
   // If this condition is equivalent to #ifndef X, and if this is the first
   // directive seen, handle it for the multiple-include optimization.
@@ -2807,9 +2876,9 @@ void Preprocessor::HandleIfDirective(Token &IfToken,
   }
 
   if (Callbacks)
-    Callbacks->If(IfToken.getLocation(),
-                  SourceRange(ConditionalBegin, ConditionalEnd),
-                  (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
+    Callbacks->If(
+        IfToken.getLocation(), DER.ExprRange,
+        (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
 
   // Should we include the stuff contained by this directive?
   if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
@@ -2902,9 +2971,7 @@ void Preprocessor::HandleElifDirective(Token &ElifToken,
   // #elif directive in a non-skipping conditional... start skipping.
   // We don't care what the condition is, because we will always skip it (since
   // the block immediately before it was included).
-  const SourceLocation ConditionalBegin = CurPPLexer->getSourceLocation();
-  DiscardUntilEndOfDirective();
-  const SourceLocation ConditionalEnd = CurPPLexer->getSourceLocation();
+  SourceRange ConditionRange = DiscardUntilEndOfDirective();
 
   PPConditionalInfo CI;
   if (CurPPLexer->popConditionalLevel(CI)) {
@@ -2920,8 +2987,7 @@ void Preprocessor::HandleElifDirective(Token &ElifToken,
   if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
 
   if (Callbacks)
-    Callbacks->Elif(ElifToken.getLocation(),
-                    SourceRange(ConditionalBegin, ConditionalEnd),
+    Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
                     PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
 
   if (PPOpts->SingleFileParseMode && !CI.FoundNonSkip) {
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index ac01efad9bf69..e5ec2b99f5074 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -1,9 +1,8 @@
 //===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -152,8 +151,8 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
       return true;
     }
     // Consume the ).
-    Result.setEnd(PeekTok.getLocation());
     PP.LexNonComment(PeekTok);
+    Result.setEnd(PeekTok.getLocation());
   } else {
     // Consume identifier.
     Result.setEnd(PeekTok.getLocation());
@@ -842,14 +841,22 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
   PPValue ResVal(BitWidth);
   DefinedTracker DT;
+  SourceLocation ExprStartLoc = SourceMgr.getExpansionLoc(Tok.getLocation());
   if (EvaluateValue(ResVal, Tok, DT, true, *this)) {
     // Parse error, skip the rest of the macro line.
+    SourceRange ConditionRange = ExprStartLoc;
     if (Tok.isNot(tok::eod))
-      DiscardUntilEndOfDirective();
+      ConditionRange = DiscardUntilEndOfDirective();
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {false, DT.IncludedUndefinedIds};
+
+    // We cannot trust the source range from the value because there was a
+    // parse error. Track the range manually -- the end of the directive is the
+    // end of the condition range.
+    return {false,
+            DT.IncludedUndefinedIds,
+            {ExprStartLoc, ConditionRange.getEnd()}};
   }
 
   // If we are at the end of the expression after just parsing a value, there
@@ -863,7 +870,7 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {ResVal.Val != 0, DT.IncludedUndefinedIds};
+    return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
   }
 
   // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
@@ -876,7 +883,7 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
     // Restore 'DisableMacroExpansion'.
     DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-    return {false, DT.IncludedUndefinedIds};
+    return {false, DT.IncludedUndefinedIds, ResVal.getRange()};
   }
 
   // If we aren't at the tok::eod token, something bad happened, like an extra
@@ -888,5 +895,5 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
 
   // Restore 'DisableMacroExpansion'.
   DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
-  return {ResVal.Val != 0, DT.IncludedUndefinedIds};
+  return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()};
 }
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index e321dd38fed6d..7cce5f9c9fe48 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -1,9 +1,8 @@
 //===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -155,10 +154,11 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
 /// must be freed.
 ///
 void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
-                                    bool DisableMacroExpansion,
-                                    bool OwnsTokens) {
+                                    bool DisableMacroExpansion, bool OwnsTokens,
+                                    bool IsReinject) {
   if (CurLexerKind == CLK_CachingLexer) {
     if (CachedLexPos < CachedTokens.size()) {
+      assert(IsReinject && "new tokens in the middle of cached stream");
       // We're entering tokens into the middle of our cached token stream. We
       // can't represent that, so just insert the tokens into the buffer.
       CachedTokens.insert(CachedTokens.begin() + CachedLexPos,
@@ -171,7 +171,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
     // New tokens are at the end of the cached token sequnece; insert the
     // token stream underneath the caching lexer.
     ExitCachingLexMode();
-    EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
+    EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens,
+                     IsReinject);
     EnterCachingLexMode();
     return;
   }
@@ -180,10 +181,11 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
   std::unique_ptr<TokenLexer> TokLexer;
   if (NumCachedTokenLexers == 0) {
     TokLexer = llvm::make_unique<TokenLexer>(
-        Toks, NumToks, DisableMacroExpansion, OwnsTokens, *this);
+        Toks, NumToks, DisableMacroExpansion, OwnsTokens, IsReinject, *this);
   } else {
     TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
-    TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
+    TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens,
+                   IsReinject);
   }
 
   // Save our current state.
@@ -271,7 +273,7 @@ void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) {
 
   ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
   const DirectoryEntry *Dir = Mod.getUmbrellaDir().Entry;
-  llvm::vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+  llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
   std::error_code EC;
   for (llvm::vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC),
        End;
@@ -645,6 +647,8 @@ void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc,
     BuildingSubmoduleStack.push_back(
         BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
                               PendingModuleMacroNames.size()));
+    if (Callbacks)
+      Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma);
     return;
   }
 
@@ -689,6 +693,9 @@ void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc,
       BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
                             PendingModuleMacroNames.size()));
 
+  if (Callbacks)
+    Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma);
+
   // Switch to this submodule as the current submodule.
   CurSubmoduleState = &State;
 
@@ -729,6 +736,10 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
     // are tracking macro visibility, don't build any, and preserve the list
     // of pending names for the surrounding submodule.
     BuildingSubmoduleStack.pop_back();
+
+    if (Callbacks)
+      Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma);
+
     makeModuleVisible(LeavingMod, ImportLoc);
     return LeavingMod;
   }
@@ -813,6 +824,9 @@ Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
 
   BuildingSubmoduleStack.pop_back();
 
+  if (Callbacks)
+    Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma);
+
   // A nested #include makes the included submodule visible.
   makeModuleVisible(LeavingMod, ImportLoc);
   return LeavingMod;
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index c70ff46ec9049..687b9a9d3b7bd 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -1,9 +1,8 @@
-//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===//
+//===--- PPMacroExpansion.cpp - Top level Macro Expansion -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,6 +43,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -364,6 +364,7 @@ void Preprocessor::RegisterBuiltinMacros() {
   }
 
   // Clang Extensions.
+  Ident__FILE_NAME__      = RegisterBuiltinMacro(*this, "__FILE_NAME__");
   Ident__has_feature      = RegisterBuiltinMacro(*this, "__has_feature");
   Ident__has_extension    = RegisterBuiltinMacro(*this, "__has_extension");
   Ident__has_builtin      = RegisterBuiltinMacro(*this, "__has_builtin");
@@ -493,10 +494,13 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
     // Preprocessor directives used inside macro arguments are not portable, and
     // this enables the warning.
     InMacroArgs = true;
+    ArgMacro = &Identifier;
+
     Args = ReadMacroCallArgumentList(Identifier, MI, ExpansionEnd);
 
     // Finished parsing args.
     InMacroArgs = false;
+    ArgMacro = nullptr;
 
     // If there was an error parsing the arguments, bail out.
     if (!Args) return true;
@@ -802,7 +806,7 @@ MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName,
         // Do not lose the EOF/EOD.
         auto Toks = llvm::make_unique<Token[]>(1);
         Toks[0] = Tok;
-        EnterTokenStream(std::move(Toks), 1, true);
+        EnterTokenStream(std::move(Toks), 1, true, /*IsReinject*/ false);
         break;
       } else if (Tok.is(tok::r_paren)) {
         // If we found the ) token, the macro arg list is done.
@@ -1151,8 +1155,11 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
     return false;
   }
 
-  // Get '('.
-  PP.LexNonComment(Tok);
+  // Get '('. If we don't have a '(', try to form a header-name token.
+  do {
+    if (PP.LexHeaderName(Tok))
+      return false;
+  } while (Tok.getKind() == tok::comment);
 
   // Ensure we have a '('.
   if (Tok.isNot(tok::l_paren)) {
@@ -1161,58 +1168,27 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
     PP.Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
     // If the next token looks like a filename or the start of one,
     // assume it is and process it as such.
-    if (!Tok.is(tok::angle_string_literal) && !Tok.is(tok::string_literal) &&
-        !Tok.is(tok::less))
+    if (Tok.isNot(tok::header_name))
       return false;
   } else {
     // Save '(' location for possible missing ')' message.
     LParenLoc = Tok.getLocation();
-
-    if (PP.getCurrentLexer()) {
-      // Get the file name.
-      PP.getCurrentLexer()->LexIncludeFilename(Tok);
-    } else {
-      // We're in a macro, so we can't use LexIncludeFilename; just
-      // grab the next token.
-      PP.Lex(Tok);
-    }
-  }
-
-  // Reserve a buffer to get the spelling.
-  SmallString<128> FilenameBuffer;
-  StringRef Filename;
-  SourceLocation EndLoc;
-
-  switch (Tok.getKind()) {
-  case tok::eod:
-    // If the token kind is EOD, the error has already been diagnosed.
-    return false;
-
-  case tok::angle_string_literal:
-  case tok::string_literal: {
-    bool Invalid = false;
-    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
-    if (Invalid)
+    if (PP.LexHeaderName(Tok))
       return false;
-    break;
   }
 
-  case tok::less:
-    // This could be a <foo/bar.h> file coming from a macro expansion.  In this
-    // case, glue the tokens together into FilenameBuffer and interpret those.
-    FilenameBuffer.push_back('<');
-    if (PP.ConcatenateIncludeName(FilenameBuffer, EndLoc)) {
-      // Let the caller know a <eod> was found by changing the Token kind.
-      Tok.setKind(tok::eod);
-      return false;   // Found <eod> but no ">"?  Diagnostic already emitted.
-    }
-    Filename = FilenameBuffer;
-    break;
-  default:
+  if (Tok.isNot(tok::header_name)) {
     PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename);
     return false;
   }
 
+  // Reserve a buffer to get the spelling.
+  SmallString<128> FilenameBuffer;
+  bool Invalid = false;
+  StringRef Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
+  if (Invalid)
+    return false;
+
   SourceLocation FilenameLoc = Tok.getLocation();
 
   // Get ')'.
@@ -1236,7 +1212,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
   const DirectoryLookup *CurDir;
   const FileEntry *File =
       PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, LookupFromFile,
-                    CurDir, nullptr, nullptr, nullptr, nullptr);
+                    CurDir, nullptr, nullptr, nullptr, nullptr, nullptr);
 
   if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
     SrcMgr::CharacteristicKind FileType = SrcMgr::C_User;
@@ -1354,9 +1330,13 @@ already_lexed:
 
         // The last ')' has been reached; return the value if one found or
         // a diagnostic and a dummy value.
-        if (Result.hasValue())
+        if (Result.hasValue()) {
           OS << Result.getValue();
-        else {
+          // For strict conformance to __has_cpp_attribute rules, use 'L'
+          // suffix for dated literals.
+          if (Result.getValue() > 1)
+            OS << 'L';
+        } else {
           OS << 0;
           if (!SuppressDiagnostic)
             PP.Diag(Tok.getLocation(), diag::err_too_few_args_in_macro_invoc);
@@ -1478,6 +1458,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
   // Set up the return result.
   Tok.setIdentifierInfo(nullptr);
   Tok.clearFlag(Token::NeedsCleaning);
+  bool IsAtStartOfLine = Tok.isAtStartOfLine();
+  bool HasLeadingSpace = Tok.hasLeadingSpace();
 
   if (II == Ident__LINE__) {
     // C99 6.10.8: "__LINE__: The presumed line number (within the current
@@ -1500,7 +1482,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     // __LINE__ expands to a simple numeric value.
     OS << (PLoc.isValid()? PLoc.getLine() : 1);
     Tok.setKind(tok::numeric_constant);
-  } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
+  } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__ ||
+             II == Ident__FILE_NAME__) {
     // C99 6.10.8: "__FILE__: The presumed name of the current source file (a
     // character string literal)". This can be affected by #line.
     PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
@@ -1521,7 +1504,19 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     // Escape this filename.  Turn '\' -> '\\' '"' -> '\"'
     SmallString<128> FN;
     if (PLoc.isValid()) {
-      FN += PLoc.getFilename();
+      // __FILE_NAME__ is a Clang-specific extension that expands to the
+      // the last part of __FILE__.
+      if (II == Ident__FILE_NAME__) {
+        // Try to get the last path component, failing that return the original
+        // presumed location.
+        StringRef PLFileName = llvm::sys::path::filename(PLoc.getFilename());
+        if (PLFileName != "")
+          FN += PLFileName;
+        else
+          FN += PLoc.getFilename();
+      } else {
+        FN += PLoc.getFilename();
+      }
       Lexer::Stringify(FN);
       OS << '"' << FN << '"';
     }
@@ -1631,6 +1626,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
                       .Case("__is_target_vendor", true)
                       .Case("__is_target_os", true)
                       .Case("__is_target_environment", true)
+                      .Case("__builtin_LINE", true)
+                      .Case("__builtin_FILE", true)
+                      .Case("__builtin_FUNCTION", true)
+                      .Case("__builtin_COLUMN", true)
+                      .Case("__builtin_bit_cast", true)
                       .Default(false);
         }
       });
@@ -1707,7 +1707,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
 
         HasLexedNextToken = Tok.is(tok::string_literal);
         if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
-                                    /*MacroExpansion=*/false))
+                                    /*AllowMacroExpansion=*/false))
           return false;
 
         // FIXME: Should we accept "-R..." flags here, or should that be
@@ -1814,6 +1814,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     llvm_unreachable("Unknown identifier!");
   }
   CreateString(OS.str(), Tok, Tok.getLocation(), Tok.getLocation());
+  Tok.setFlagValue(Token::StartOfLine, IsAtStartOfLine);
+  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
 }
 
 void Preprocessor::markMacroAsUsed(MacroInfo *MI) {
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 575935119f6f1..4e4db668551f8 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -1,9 +1,8 @@
 //===- Pragma.cpp - Pragma registration and handling ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,7 +63,7 @@ PragmaHandler::~PragmaHandler() = default;
 EmptyPragmaHandler::EmptyPragmaHandler(StringRef Name) : PragmaHandler(Name) {}
 
 void EmptyPragmaHandler::HandlePragma(Preprocessor &PP,
-                                      PragmaIntroducerKind Introducer,
+                                      PragmaIntroducer Introducer,
                                       Token &FirstToken) {}
 
 //===----------------------------------------------------------------------===//
@@ -99,8 +98,7 @@ void PragmaNamespace::RemovePragmaHandler(PragmaHandler *Handler) {
 }
 
 void PragmaNamespace::HandlePragma(Preprocessor &PP,
-                                   PragmaIntroducerKind Introducer,
-                                   Token &Tok) {
+                                   PragmaIntroducer Introducer, Token &Tok) {
   // Read the 'namespace' that the directive is in, e.g. STDC.  Do not macro
   // expand it, the user can have a STDC #define, that should not affect this.
   PP.LexUnexpandedToken(Tok);
@@ -125,10 +123,9 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP,
 
 /// HandlePragmaDirective - The "\#pragma" directive has been parsed.  Lex the
 /// rest of the pragma, passing it to the registered pragma handlers.
-void Preprocessor::HandlePragmaDirective(SourceLocation IntroducerLoc,
-                                         PragmaIntroducerKind Introducer) {
+void Preprocessor::HandlePragmaDirective(PragmaIntroducer Introducer) {
   if (Callbacks)
-    Callbacks->PragmaDirective(IntroducerLoc, Introducer);
+    Callbacks->PragmaDirective(Introducer.Loc, Introducer.Kind);
 
   if (!PragmasEnabled)
     return;
@@ -145,84 +142,73 @@ void Preprocessor::HandlePragmaDirective(SourceLocation IntroducerLoc,
     DiscardUntilEndOfDirective();
 }
 
-namespace {
-
-/// Helper class for \see Preprocessor::Handle_Pragma.
-class LexingFor_PragmaRAII {
-  Preprocessor &PP;
-  bool InMacroArgPreExpansion;
-  bool Failed = false;
-  Token &OutTok;
-  Token PragmaTok;
-
-public:
-  LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion,
-                       Token &Tok)
-      : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) {
-    if (InMacroArgPreExpansion) {
-      PragmaTok = OutTok;
-      PP.EnableBacktrackAtThisPos();
-    }
-  }
-
-  ~LexingFor_PragmaRAII() {
-    if (InMacroArgPreExpansion) {
-      // When committing/backtracking the cached pragma tokens in a macro
-      // argument pre-expansion we want to ensure that either the tokens which
-      // have been committed will be removed from the cache or that the tokens
-      // over which we just backtracked won't remain in the cache after they're
-      // consumed and that the caching will stop after consuming them.
-      // Otherwise the caching will interfere with the way macro expansion
-      // works, because we will continue to cache tokens after consuming the
-      // backtracked tokens, which shouldn't happen when we're dealing with
-      // macro argument pre-expansion.
-      auto CachedTokenRange = PP.LastCachedTokenRange();
-      if (Failed) {
-        PP.CommitBacktrackedTokens();
-      } else {
-        PP.Backtrack();
-        OutTok = PragmaTok;
-      }
-      PP.EraseCachedTokens(CachedTokenRange);
-    }
-  }
-
-  void failed() {
-    Failed = true;
-  }
-};
-
-} // namespace
-
 /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
 /// return the first token after the directive.  The _Pragma token has just
 /// been read into 'Tok'.
 void Preprocessor::Handle_Pragma(Token &Tok) {
-  // This works differently if we are pre-expanding a macro argument.
-  // In that case we don't actually "activate" the pragma now, we only lex it
-  // until we are sure it is lexically correct and then we backtrack so that
-  // we activate the pragma whenever we encounter the tokens again in the token
-  // stream. This ensures that we will activate it in the correct location
-  // or that we will ignore it if it never enters the token stream, e.g:
+  // C11 6.10.3.4/3:
+  //   all pragma unary operator expressions within [a completely
+  //   macro-replaced preprocessing token sequence] are [...] processed [after
+  //   rescanning is complete]
+  //
+  // This means that we execute _Pragma operators in two cases:
+  //
+  //  1) on token sequences that would otherwise be produced as the output of
+  //     phase 4 of preprocessing, and
+  //  2) on token sequences formed as the macro-replaced token sequence of a
+  //     macro argument
   //
-  //     #define EMPTY(x)
-  //     #define INACTIVE(x) EMPTY(x)
-  //     INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\""))
+  // Case #2 appears to be a wording bug: only _Pragmas that would survive to
+  // the end of phase 4 should actually be executed. Discussion on the WG14
+  // mailing list suggests that a _Pragma operator is notionally checked early,
+  // but only pragmas that survive to the end of phase 4 should be executed.
+  //
+  // In Case #2, we check the syntax now, but then put the tokens back into the
+  // token stream for later consumption.
+
+  struct TokenCollector {
+    Preprocessor &Self;
+    bool Collect;
+    SmallVector<Token, 3> Tokens;
+    Token &Tok;
+
+    void lex() {
+      if (Collect)
+        Tokens.push_back(Tok);
+      Self.Lex(Tok);
+    }
 
-  LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok);
+    void revert() {
+      assert(Collect && "did not collect tokens");
+      assert(!Tokens.empty() && "collected unexpected number of tokens");
+
+      // Push the ( "string" ) tokens into the token stream.
+      auto Toks = llvm::make_unique<Token[]>(Tokens.size());
+      std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get());
+      Toks[Tokens.size() - 1] = Tok;
+      Self.EnterTokenStream(std::move(Toks), Tokens.size(),
+                            /*DisableMacroExpansion*/ true,
+                            /*IsReinject*/ true);
+
+      // ... and return the _Pragma token unchanged.
+      Tok = *Tokens.begin();
+    }
+  };
+
+  TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok};
 
   // Remember the pragma token location.
   SourceLocation PragmaLoc = Tok.getLocation();
 
   // Read the '('.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::l_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Read the '"..."'.
-  Lex(Tok);
+  Toks.lex();
   if (!tok::isStringLiteral(Tok.getKind())) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
     // Skip bad tokens, and the ')', if present.
@@ -234,7 +220,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
       Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   if (Tok.hasUDSuffix()) {
@@ -243,21 +229,24 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
     Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Remember the string.
   Token StrTok = Tok;
 
   // Read the ')'.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::r_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
-  if (InMacroArgPreExpansion)
+  // If we're expanding a macro argument, put the tokens back.
+  if (InMacroArgPreExpansion) {
+    Toks.revert();
     return;
+  }
 
   SourceLocation RParenLoc = Tok.getLocation();
   std::string StrVal = getSpelling(StrTok);
@@ -330,7 +319,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
   EnterSourceFileWithLexer(TL, nullptr);
 
   // With everything set up, lex this as a #pragma directive.
-  HandlePragmaDirective(PragmaLoc, PIK__Pragma);
+  HandlePragmaDirective({PIK__Pragma, PragmaLoc});
 
   // Finally, return whatever came after the pragma directive.
   return Lex(Tok);
@@ -376,10 +365,11 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) {
   std::copy(PragmaToks.begin(), PragmaToks.end(), TokArray);
 
   // Push the tokens onto the stack.
-  EnterTokenStream(TokArray, PragmaToks.size(), true, true);
+  EnterTokenStream(TokArray, PragmaToks.size(), true, true,
+                   /*IsReinject*/ false);
 
   // With everything set up, lex this as a #pragma directive.
-  HandlePragmaDirective(PragmaLoc, PIK___pragma);
+  HandlePragmaDirective({PIK___pragma, PragmaLoc});
 
   // Finally, return whatever came after the pragma directive.
   return Lex(Tok);
@@ -483,11 +473,14 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
 /// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah.
 void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
   Token FilenameTok;
-  CurPPLexer->LexIncludeFilename(FilenameTok);
+  if (LexHeaderName(FilenameTok, /*AllowConcatenation*/false))
+    return;
 
-  // If the token kind is EOD, the error has already been diagnosed.
-  if (FilenameTok.is(tok::eod))
+  // If the next token wasn't a header-name, diagnose the error.
+  if (FilenameTok.isNot(tok::header_name)) {
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
     return;
+  }
 
   // Reserve a buffer to get the spelling.
   SmallString<128> FilenameBuffer;
@@ -507,7 +500,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
   const DirectoryLookup *CurDir;
   const FileEntry *File =
       LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr,
-                 nullptr, CurDir, nullptr, nullptr, nullptr, nullptr);
+                 nullptr, CurDir, nullptr, nullptr, nullptr, nullptr, nullptr);
   if (!File) {
     if (!SuppressIncludeNotFoundError)
       Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
@@ -663,24 +656,13 @@ void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) {
 
   // We expect either a quoted string literal, or a bracketed name
   Token SourceFilenameTok;
-  CurPPLexer->LexIncludeFilename(SourceFilenameTok);
-  if (SourceFilenameTok.is(tok::eod)) {
-    // The diagnostic has already been handled
+  if (LexHeaderName(SourceFilenameTok))
     return;
-  }
 
   StringRef SourceFileName;
   SmallString<128> FileNameBuffer;
-  if (SourceFilenameTok.is(tok::string_literal) ||
-      SourceFilenameTok.is(tok::angle_string_literal)) {
+  if (SourceFilenameTok.is(tok::header_name)) {
     SourceFileName = getSpelling(SourceFilenameTok, FileNameBuffer);
-  } else if (SourceFilenameTok.is(tok::less)) {
-    // This could be a path instead of just a name
-    FileNameBuffer.push_back('<');
-    SourceLocation End;
-    if (ConcatenateIncludeName(FileNameBuffer, End))
-      return; // Diagnostic already emitted
-    SourceFileName = FileNameBuffer;
   } else {
     Diag(Tok, diag::warn_pragma_include_alias_expected_filename);
     return;
@@ -695,23 +677,12 @@ void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) {
   }
 
   Token ReplaceFilenameTok;
-  CurPPLexer->LexIncludeFilename(ReplaceFilenameTok);
-  if (ReplaceFilenameTok.is(tok::eod)) {
-    // The diagnostic has already been handled
+  if (LexHeaderName(ReplaceFilenameTok))
     return;
-  }
 
   StringRef ReplaceFileName;
-  if (ReplaceFilenameTok.is(tok::string_literal) ||
-      ReplaceFilenameTok.is(tok::angle_string_literal)) {
+  if (ReplaceFilenameTok.is(tok::header_name)) {
     ReplaceFileName = getSpelling(ReplaceFilenameTok, FileNameBuffer);
-  } else if (ReplaceFilenameTok.is(tok::less)) {
-    // This could be a path instead of just a name
-    FileNameBuffer.push_back('<');
-    SourceLocation End;
-    if (ConcatenateIncludeName(FileNameBuffer, End))
-      return; // Diagnostic already emitted
-    ReplaceFileName = FileNameBuffer;
   } else {
     Diag(Tok, diag::warn_pragma_include_alias_expected_filename);
     return;
@@ -986,7 +957,7 @@ namespace {
 struct PragmaOnceHandler : public PragmaHandler {
   PragmaOnceHandler() : PragmaHandler("once") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &OnceTok) override {
     PP.CheckEndOfDirective("pragma once");
     PP.HandlePragmaOnce(OnceTok);
@@ -998,7 +969,7 @@ struct PragmaOnceHandler : public PragmaHandler {
 struct PragmaMarkHandler : public PragmaHandler {
   PragmaMarkHandler() : PragmaHandler("mark") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &MarkTok) override {
     PP.HandlePragmaMark();
   }
@@ -1008,7 +979,7 @@ struct PragmaMarkHandler : public PragmaHandler {
 struct PragmaPoisonHandler : public PragmaHandler {
   PragmaPoisonHandler() : PragmaHandler("poison") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &PoisonTok) override {
     PP.HandlePragmaPoison();
   }
@@ -1019,7 +990,7 @@ struct PragmaPoisonHandler : public PragmaHandler {
 struct PragmaSystemHeaderHandler : public PragmaHandler {
   PragmaSystemHeaderHandler() : PragmaHandler("system_header") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &SHToken) override {
     PP.HandlePragmaSystemHeader(SHToken);
     PP.CheckEndOfDirective("pragma");
@@ -1029,7 +1000,7 @@ struct PragmaSystemHeaderHandler : public PragmaHandler {
 struct PragmaDependencyHandler : public PragmaHandler {
   PragmaDependencyHandler() : PragmaHandler("dependency") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &DepToken) override {
     PP.HandlePragmaDependency(DepToken);
   }
@@ -1038,8 +1009,8 @@ struct PragmaDependencyHandler : public PragmaHandler {
 struct PragmaDebugHandler : public PragmaHandler {
   PragmaDebugHandler() : PragmaHandler("__debug") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
-                    Token &DepToken) override {
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+                    Token &DebugToken) override {
     Token Tok;
     PP.LexUnexpandedToken(Tok);
     if (Tok.isNot(tok::identifier)) {
@@ -1057,7 +1028,7 @@ struct PragmaDebugHandler : public PragmaHandler {
       Crasher.startToken();
       Crasher.setKind(tok::annot_pragma_parser_crash);
       Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
-      PP.EnterToken(Crasher);
+      PP.EnterToken(Crasher, /*IsReinject*/false);
     } else if (II->isStr("dump")) {
       Token Identifier;
       PP.LexUnexpandedToken(Identifier);
@@ -1069,7 +1040,7 @@ struct PragmaDebugHandler : public PragmaHandler {
             SourceRange(Tok.getLocation(), Identifier.getLocation()));
         DumpAnnot.setAnnotationValue(DumpII);
         PP.DiscardUntilEndOfDirective();
-        PP.EnterToken(DumpAnnot);
+        PP.EnterToken(DumpAnnot, /*IsReinject*/false);
       } else {
         PP.Diag(Identifier, diag::warn_pragma_debug_missing_argument)
             << II->getName();
@@ -1101,6 +1072,22 @@ struct PragmaDebugHandler : public PragmaHandler {
       else
         PP.Diag(MacroName, diag::warn_pragma_debug_missing_argument)
             << II->getName();
+    } else if (II->isStr("module_map")) {
+      llvm::SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 8>
+          ModuleName;
+      if (LexModuleName(PP, Tok, ModuleName))
+        return;
+      ModuleMap &MM = PP.getHeaderSearchInfo().getModuleMap();
+      Module *M = nullptr;
+      for (auto IIAndLoc : ModuleName) {
+        M = MM.lookupModuleQualified(IIAndLoc.first->getName(), M);
+        if (!M) {
+          PP.Diag(IIAndLoc.second, diag::warn_pragma_debug_unknown_module)
+              << IIAndLoc.first;
+          return;
+        }
+      }
+      M->dump();
     } else if (II->isStr("overflow_stack")) {
       DebugOverflowStack();
     } else if (II->isStr("handle_crash")) {
@@ -1136,7 +1123,8 @@ struct PragmaDebugHandler : public PragmaHandler {
     Toks[0].setKind(tok::annot_pragma_captured);
     Toks[0].setLocation(NameLoc);
 
-    PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
+    PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true,
+                        /*IsReinject=*/false);
   }
 
 // Disable MSVC warning about runtime stack overflow.
@@ -1161,7 +1149,7 @@ public:
   explicit PragmaDiagnosticHandler(const char *NS)
       : PragmaHandler("diagnostic"), Namespace(NS) {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &DiagToken) override {
     SourceLocation DiagLoc = DiagToken.getLocation();
     Token Tok;
@@ -1203,7 +1191,7 @@ public:
 
     std::string WarningName;
     if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic",
-                                   /*MacroExpansion=*/false))
+                                   /*AllowMacroExpansion=*/false))
       return;
 
     if (Tok.isNot(tok::eod)) {
@@ -1240,7 +1228,7 @@ public:
 /// "\#pragma hdrstop [<header-name-string>]"
 struct PragmaHdrstopHandler : public PragmaHandler {
   PragmaHdrstopHandler() : PragmaHandler("hdrstop") {}
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &DepToken) override {
     PP.HandlePragmaHdrstop(DepToken);
   }
@@ -1252,7 +1240,7 @@ struct PragmaHdrstopHandler : public PragmaHandler {
 struct PragmaWarningHandler : public PragmaHandler {
   PragmaWarningHandler() : PragmaHandler("warning") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &Tok) override {
     // Parse things like:
     // warning(push, 1)
@@ -1369,11 +1357,75 @@ struct PragmaWarningHandler : public PragmaHandler {
   }
 };
 
+/// "\#pragma execution_character_set(...)". MSVC supports this pragma only
+/// for "UTF-8". We parse it and ignore it if UTF-8 is provided and warn
+/// otherwise to avoid -Wunknown-pragma warnings.
+struct PragmaExecCharsetHandler : public PragmaHandler {
+  PragmaExecCharsetHandler() : PragmaHandler("execution_character_set") {}
+
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+                    Token &Tok) override {
+    // Parse things like:
+    // execution_character_set(push, "UTF-8")
+    // execution_character_set(pop)
+    SourceLocation DiagLoc = Tok.getLocation();
+    PPCallbacks *Callbacks = PP.getPPCallbacks();
+
+    PP.Lex(Tok);
+    if (Tok.isNot(tok::l_paren)) {
+      PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << "(";
+      return;
+    }
+
+    PP.Lex(Tok);
+    IdentifierInfo *II = Tok.getIdentifierInfo();
+
+    if (II && II->isStr("push")) {
+      // #pragma execution_character_set( push[ , string ] )
+      PP.Lex(Tok);
+      if (Tok.is(tok::comma)) {
+        PP.Lex(Tok);
+
+        std::string ExecCharset;
+        if (!PP.FinishLexStringLiteral(Tok, ExecCharset,
+                                       "pragma execution_character_set",
+                                       /*AllowMacroExpansion=*/false))
+          return;
+
+        // MSVC supports either of these, but nothing else.
+        if (ExecCharset != "UTF-8" && ExecCharset != "utf-8") {
+          PP.Diag(Tok, diag::warn_pragma_exec_charset_push_invalid) << ExecCharset;
+          return;
+        }
+      }
+      if (Callbacks)
+        Callbacks->PragmaExecCharsetPush(DiagLoc, "UTF-8");
+    } else if (II && II->isStr("pop")) {
+      // #pragma execution_character_set( pop )
+      PP.Lex(Tok);
+      if (Callbacks)
+        Callbacks->PragmaExecCharsetPop(DiagLoc);
+    } else {
+      PP.Diag(Tok, diag::warn_pragma_exec_charset_spec_invalid);
+      return;
+    }
+
+    if (Tok.isNot(tok::r_paren)) {
+      PP.Diag(Tok, diag::warn_pragma_exec_charset_expected) << ")";
+      return;
+    }
+
+    PP.Lex(Tok);
+    if (Tok.isNot(tok::eod))
+      PP.Diag(Tok, diag::ext_pp_extra_tokens_at_eol) << "pragma execution_character_set";
+  }
+};
+
 /// PragmaIncludeAliasHandler - "\#pragma include_alias("...")".
 struct PragmaIncludeAliasHandler : public PragmaHandler {
   PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &IncludeAliasTok) override {
     PP.HandlePragmaIncludeAlias(IncludeAliasTok);
   }
@@ -1416,7 +1468,7 @@ public:
       : PragmaHandler(PragmaKind(Kind, true)), Kind(Kind),
         Namespace(Namespace) {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &Tok) override {
     SourceLocation MessageLoc = Tok.getLocation();
     PP.Lex(Tok);
@@ -1438,7 +1490,7 @@ public:
 
     std::string MessageString;
     if (!PP.FinishLexStringLiteral(Tok, MessageString, PragmaKind(Kind),
-                                   /*MacroExpansion=*/true))
+                                   /*AllowMacroExpansion=*/true))
       return;
 
     if (ExpectClosingParen) {
@@ -1472,7 +1524,7 @@ public:
 struct PragmaModuleImportHandler : public PragmaHandler {
   PragmaModuleImportHandler() : PragmaHandler("import") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &Tok) override {
     SourceLocation ImportLoc = Tok.getLocation();
 
@@ -1488,7 +1540,7 @@ struct PragmaModuleImportHandler : public PragmaHandler {
     // If we have a non-empty module path, load the named module.
     Module *Imported =
         PP.getModuleLoader().loadModule(ImportLoc, ModuleName, Module::Hidden,
-                                      /*IsIncludeDirective=*/false);
+                                      /*IsInclusionDirective=*/false);
     if (!Imported)
       return;
 
@@ -1509,7 +1561,7 @@ struct PragmaModuleImportHandler : public PragmaHandler {
 struct PragmaModuleBeginHandler : public PragmaHandler {
   PragmaModuleBeginHandler() : PragmaHandler("begin") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &Tok) override {
     SourceLocation BeginLoc = Tok.getLocation();
 
@@ -1533,16 +1585,15 @@ struct PragmaModuleBeginHandler : public PragmaHandler {
 
     // Find the module we're entering. We require that a module map for it
     // be loaded or implicitly loadable.
-    // FIXME: We could create the submodule here. We'd need to know whether
-    // it's supposed to be explicit, but not much else.
-    Module *M = PP.getHeaderSearchInfo().lookupModule(Current);
+    auto &HSI = PP.getHeaderSearchInfo();
+    Module *M = HSI.lookupModule(Current);
     if (!M) {
       PP.Diag(ModuleName.front().second,
               diag::err_pp_module_begin_no_module_map) << Current;
       return;
     }
     for (unsigned I = 1; I != ModuleName.size(); ++I) {
-      auto *NewM = M->findSubmodule(ModuleName[I].first->getName());
+      auto *NewM = M->findOrInferSubmodule(ModuleName[I].first->getName());
       if (!NewM) {
         PP.Diag(ModuleName[I].second, diag::err_pp_module_begin_no_submodule)
           << M->getFullModuleName() << ModuleName[I].first;
@@ -1570,7 +1621,7 @@ struct PragmaModuleBeginHandler : public PragmaHandler {
 struct PragmaModuleEndHandler : public PragmaHandler {
   PragmaModuleEndHandler() : PragmaHandler("end") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &Tok) override {
     SourceLocation Loc = Tok.getLocation();
 
@@ -1590,7 +1641,7 @@ struct PragmaModuleEndHandler : public PragmaHandler {
 struct PragmaModuleBuildHandler : public PragmaHandler {
   PragmaModuleBuildHandler() : PragmaHandler("build") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &Tok) override {
     PP.HandlePragmaModuleBuild(Tok);
   }
@@ -1600,7 +1651,7 @@ struct PragmaModuleBuildHandler : public PragmaHandler {
 struct PragmaModuleLoadHandler : public PragmaHandler {
   PragmaModuleLoadHandler() : PragmaHandler("load") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &Tok) override {
     SourceLocation Loc = Tok.getLocation();
 
@@ -1615,7 +1666,7 @@ struct PragmaModuleLoadHandler : public PragmaHandler {
 
     // Load the module, don't make it visible.
     PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden,
-                                    /*IsIncludeDirective=*/false);
+                                    /*IsInclusionDirective=*/false);
   }
 };
 
@@ -1624,7 +1675,7 @@ struct PragmaModuleLoadHandler : public PragmaHandler {
 struct PragmaPushMacroHandler : public PragmaHandler {
   PragmaPushMacroHandler() : PragmaHandler("push_macro") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &PushMacroTok) override {
     PP.HandlePragmaPushMacro(PushMacroTok);
   }
@@ -1635,7 +1686,7 @@ struct PragmaPushMacroHandler : public PragmaHandler {
 struct PragmaPopMacroHandler : public PragmaHandler {
   PragmaPopMacroHandler() : PragmaHandler("pop_macro") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &PopMacroTok) override {
     PP.HandlePragmaPopMacro(PopMacroTok);
   }
@@ -1646,7 +1697,7 @@ struct PragmaPopMacroHandler : public PragmaHandler {
 struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
   PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &NameTok) override {
     SourceLocation Loc = NameTok.getLocation();
     bool IsBegin;
@@ -1701,7 +1752,7 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler {
 struct PragmaAssumeNonNullHandler : public PragmaHandler {
   PragmaAssumeNonNullHandler() : PragmaHandler("assume_nonnull") {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &NameTok) override {
     SourceLocation Loc = NameTok.getLocation();
     bool IsBegin;
@@ -1770,7 +1821,7 @@ struct PragmaAssumeNonNullHandler : public PragmaHandler {
 struct PragmaRegionHandler : public PragmaHandler {
   PragmaRegionHandler(const char *pragma) : PragmaHandler(pragma) {}
 
-  void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
                     Token &NameTok) override {
     // #pragma region: endregion matches can be verified
     // __pragma(region): no sense, but ignored by msvc
@@ -1824,6 +1875,7 @@ void Preprocessor::RegisterBuiltinPragmas() {
   // MS extensions.
   if (LangOpts.MicrosoftExt) {
     AddPragmaHandler(new PragmaWarningHandler());
+    AddPragmaHandler(new PragmaExecCharsetHandler());
     AddPragmaHandler(new PragmaIncludeAliasHandler());
     AddPragmaHandler(new PragmaHdrstopHandler());
   }
diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index b37a8cf1ced49..115256db48095 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp
@@ -1,9 +1,8 @@
 //===- PreprocessingRecord.cpp - Record of Preprocessing ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -239,16 +238,13 @@ unsigned PreprocessingRecord::findBeginLocalPreprocessedEntity(
   return First - PreprocessedEntities.begin();
 }
 
-unsigned PreprocessingRecord::findEndLocalPreprocessedEntity(
-                                                     SourceLocation Loc) const {
+unsigned
+PreprocessingRecord::findEndLocalPreprocessedEntity(SourceLocation Loc) const {
   if (SourceMgr.isLoadedSourceLocation(Loc))
     return 0;
 
-  std::vector<PreprocessedEntity *>::const_iterator
-  I = std::upper_bound(PreprocessedEntities.begin(),
-                       PreprocessedEntities.end(),
-                       Loc,
-                       PPEntityComp<&SourceRange::getBegin>(SourceMgr));
+  auto I = llvm::upper_bound(PreprocessedEntities, Loc,
+                             PPEntityComp<&SourceRange::getBegin>(SourceMgr));
   return I - PreprocessedEntities.begin();
 }
 
@@ -306,10 +302,9 @@ PreprocessingRecord::addPreprocessedEntity(PreprocessedEntity *Entity) {
   }
 
   // Linear search unsuccessful. Do a binary search.
-  pp_iter I = std::upper_bound(PreprocessedEntities.begin(),
-                               PreprocessedEntities.end(),
-                               BeginLoc,
-                               PPEntityComp<&SourceRange::getBegin>(SourceMgr));
+  pp_iter I =
+      llvm::upper_bound(PreprocessedEntities, BeginLoc,
+                        PPEntityComp<&SourceRange::getBegin>(SourceMgr));
   pp_iter insertI = PreprocessedEntities.insert(I, Entity);
   return getPPEntityID(insertI - PreprocessedEntities.begin(),
                        /*isLoaded=*/false);
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 047a4caaca73f..bdc5fbcd2beab 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -1,9 +1,8 @@
-//===- Preprocess.cpp - C Language Family Preprocessor Implementation -----===//
+//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -78,12 +77,12 @@ ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
 
 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
                            DiagnosticsEngine &diags, LangOptions &opts,
-                           SourceManager &SM, MemoryBufferCache &PCMCache,
-                           HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
+                           SourceManager &SM, HeaderSearch &Headers,
+                           ModuleLoader &TheModuleLoader,
                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
                            TranslationUnitKind TUKind)
     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
-      FileMgr(Headers.getFileMgr()), SourceMgr(SM), PCMCache(PCMCache),
+      FileMgr(Headers.getFileMgr()), SourceMgr(SM),
       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
       // As the language options may have not been loaded yet (when
@@ -103,6 +102,7 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
   DisableMacroExpansion = false;
   MacroExpansionInDirectivesOverride = false;
   InMacroArgs = false;
+  ArgMacro = nullptr;
   InMacroArgPreExpansion = false;
   NumCachedTokenLexers = 0;
   PragmasEnabled = true;
@@ -567,7 +567,8 @@ void Preprocessor::EnterMainSourceFile() {
         SourceLocation(), PPOpts->PCHThroughHeader,
         /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
         /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
-        /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr);
+        /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
+        /*IsFrameworkFound=*/nullptr);
     if (!File) {
       Diag(SourceLocation(), diag::err_pp_through_header_not_found)
           << PPOpts->PCHThroughHeader;
@@ -624,8 +625,22 @@ void Preprocessor::SkipTokensWhileUsingPCH() {
   bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
   Token Tok;
   while (true) {
-    bool InPredefines = (CurLexer->getFileID() == getPredefinesFileID());
-    CurLexer->Lex(Tok);
+    bool InPredefines =
+        (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
+    switch (CurLexerKind) {
+    case CLK_Lexer:
+      CurLexer->Lex(Tok);
+     break;
+    case CLK_TokenLexer:
+      CurTokenLexer->Lex(Tok);
+      break;
+    case CLK_CachingLexer:
+      CachingLex(Tok);
+      break;
+    case CLK_LexAfterModuleImport:
+      LexAfterModuleImport(Tok);
+      break;
+    }
     if (Tok.is(tok::eof) && !InPredefines) {
       ReachedMainFileEOF = true;
       break;
@@ -861,6 +876,8 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
 }
 
 void Preprocessor::Lex(Token &Result) {
+  ++LexLevel;
+
   // We loop here until a lex function returns a token; this avoids recursion.
   bool ReturnedToken;
   do {
@@ -876,8 +893,7 @@ void Preprocessor::Lex(Token &Result) {
       ReturnedToken = true;
       break;
     case CLK_LexAfterModuleImport:
-      LexAfterModuleImport(Result);
-      ReturnedToken = true;
+      ReturnedToken = LexAfterModuleImport(Result);
       break;
     }
   } while (!ReturnedToken);
@@ -891,17 +907,296 @@ void Preprocessor::Lex(Token &Result) {
     Result.setIdentifierInfo(nullptr);
   }
 
+  // Update ImportSeqState to track our position within a C++20 import-seq
+  // if this token is being produced as a result of phase 4 of translation.
+  if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
+      !Result.getFlag(Token::IsReinjected)) {
+    switch (Result.getKind()) {
+    case tok::l_paren: case tok::l_square: case tok::l_brace:
+      ImportSeqState.handleOpenBracket();
+      break;
+    case tok::r_paren: case tok::r_square:
+      ImportSeqState.handleCloseBracket();
+      break;
+    case tok::r_brace:
+      ImportSeqState.handleCloseBrace();
+      break;
+    case tok::semi:
+      ImportSeqState.handleSemi();
+      break;
+    case tok::header_name:
+    case tok::annot_header_unit:
+      ImportSeqState.handleHeaderName();
+      break;
+    case tok::kw_export:
+      ImportSeqState.handleExport();
+      break;
+    case tok::identifier:
+      if (Result.getIdentifierInfo()->isModulesImport()) {
+        ImportSeqState.handleImport();
+        if (ImportSeqState.afterImportSeq()) {
+          ModuleImportLoc = Result.getLocation();
+          ModuleImportPath.clear();
+          ModuleImportExpectsIdentifier = true;
+          CurLexerKind = CLK_LexAfterModuleImport;
+        }
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    default:
+      ImportSeqState.handleMisc();
+      break;
+    }
+  }
+
   LastTokenWasAt = Result.is(tok::at);
+  --LexLevel;
+  if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected))
+    OnToken(Result);
+}
+
+/// Lex a header-name token (including one formed from header-name-tokens if
+/// \p AllowConcatenation is \c true).
+///
+/// \param FilenameTok Filled in with the next token. On success, this will
+///        be either a header_name token. On failure, it will be whatever other
+///        token was found instead.
+/// \param AllowMacroExpansion If \c true, allow the header name to be formed
+///        by macro expansion (concatenating tokens as necessary if the first
+///        token is a '<').
+/// \return \c true if we reached EOD or EOF while looking for a > token in
+///         a concatenated header name and diagnosed it. \c false otherwise.
+bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
+  // Lex using header-name tokenization rules if tokens are being lexed from
+  // a file. Just grab a token normally if we're in a macro expansion.
+  if (CurPPLexer)
+    CurPPLexer->LexIncludeFilename(FilenameTok);
+  else
+    Lex(FilenameTok);
+
+  // This could be a <foo/bar.h> file coming from a macro expansion.  In this
+  // case, glue the tokens together into an angle_string_literal token.
+  SmallString<128> FilenameBuffer;
+  if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
+    bool StartOfLine = FilenameTok.isAtStartOfLine();
+    bool LeadingSpace = FilenameTok.hasLeadingSpace();
+    bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
+
+    SourceLocation Start = FilenameTok.getLocation();
+    SourceLocation End;
+    FilenameBuffer.push_back('<');
+
+    // Consume tokens until we find a '>'.
+    // FIXME: A header-name could be formed starting or ending with an
+    // alternative token. It's not clear whether that's ill-formed in all
+    // cases.
+    while (FilenameTok.isNot(tok::greater)) {
+      Lex(FilenameTok);
+      if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
+        Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
+        Diag(Start, diag::note_matching) << tok::less;
+        return true;
+      }
+
+      End = FilenameTok.getLocation();
+
+      // FIXME: Provide code completion for #includes.
+      if (FilenameTok.is(tok::code_completion)) {
+        setCodeCompletionReached();
+        Lex(FilenameTok);
+        continue;
+      }
+
+      // Append the spelling of this token to the buffer. If there was a space
+      // before it, add it now.
+      if (FilenameTok.hasLeadingSpace())
+        FilenameBuffer.push_back(' ');
+
+      // Get the spelling of the token, directly into FilenameBuffer if
+      // possible.
+      size_t PreAppendSize = FilenameBuffer.size();
+      FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
+
+      const char *BufPtr = &FilenameBuffer[PreAppendSize];
+      unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
+
+      // If the token was spelled somewhere else, copy it into FilenameBuffer.
+      if (BufPtr != &FilenameBuffer[PreAppendSize])
+        memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
+
+      // Resize FilenameBuffer to the correct size.
+      if (FilenameTok.getLength() != ActualLen)
+        FilenameBuffer.resize(PreAppendSize + ActualLen);
+    }
+
+    FilenameTok.startToken();
+    FilenameTok.setKind(tok::header_name);
+    FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
+    FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
+    FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
+    CreateString(FilenameBuffer, FilenameTok, Start, End);
+  } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
+    // Convert a string-literal token of the form " h-char-sequence "
+    // (produced by macro expansion) into a header-name token.
+    //
+    // The rules for header-names don't quite match the rules for
+    // string-literals, but all the places where they differ result in
+    // undefined behavior, so we can and do treat them the same.
+    //
+    // A string-literal with a prefix or suffix is not translated into a
+    // header-name. This could theoretically be observable via the C++20
+    // context-sensitive header-name formation rules.
+    StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
+    if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
+      FilenameTok.setKind(tok::header_name);
+  }
+
+  return false;
+}
+
+/// Collect the tokens of a C++20 pp-import-suffix.
+void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
+  // FIXME: For error recovery, consider recognizing attribute syntax here
+  // and terminating / diagnosing a missing semicolon if we find anything
+  // else? (Can we leave that to the parser?)
+  unsigned BracketDepth = 0;
+  while (true) {
+    Toks.emplace_back();
+    Lex(Toks.back());
+
+    switch (Toks.back().getKind()) {
+    case tok::l_paren: case tok::l_square: case tok::l_brace:
+      ++BracketDepth;
+      break;
+
+    case tok::r_paren: case tok::r_square: case tok::r_brace:
+      if (BracketDepth == 0)
+        return;
+      --BracketDepth;
+      break;
+
+    case tok::semi:
+      if (BracketDepth == 0)
+        return;
+    break;
+
+    case tok::eof:
+      return;
+
+    default:
+      break;
+    }
+  }
 }
 
+
 /// Lex a token following the 'import' contextual keyword.
 ///
-void Preprocessor::LexAfterModuleImport(Token &Result) {
+///     pp-import: [C++20]
+///           import header-name pp-import-suffix[opt] ;
+///           import header-name-tokens pp-import-suffix[opt] ;
+/// [ObjC]    @ import module-name ;
+/// [Clang]   import module-name ;
+///
+///     header-name-tokens:
+///           string-literal
+///           < [any sequence of preprocessing-tokens other than >] >
+///
+///     module-name:
+///           module-name-qualifier[opt] identifier
+///
+///     module-name-qualifier
+///           module-name-qualifier[opt] identifier .
+///
+/// We respond to a pp-import by importing macros from the named module.
+bool Preprocessor::LexAfterModuleImport(Token &Result) {
   // Figure out what kind of lexer we actually have.
   recomputeCurLexerKind();
 
-  // Lex the next token.
-  Lex(Result);
+  // Lex the next token. The header-name lexing rules are used at the start of
+  // a pp-import.
+  //
+  // For now, we only support header-name imports in C++20 mode.
+  // FIXME: Should we allow this in all language modes that support an import
+  // declaration as an extension?
+  if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
+    if (LexHeaderName(Result))
+      return true;
+  } else {
+    Lex(Result);
+  }
+
+  // Allocate a holding buffer for a sequence of tokens and introduce it into
+  // the token stream.
+  auto EnterTokens = [this](ArrayRef<Token> Toks) {
+    auto ToksCopy = llvm::make_unique<Token[]>(Toks.size());
+    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+    EnterTokenStream(std::move(ToksCopy), Toks.size(),
+                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+  };
+
+  // Check for a header-name.
+  SmallVector<Token, 32> Suffix;
+  if (Result.is(tok::header_name)) {
+    // Enter the header-name token into the token stream; a Lex action cannot
+    // both return a token and cache tokens (doing so would corrupt the token
+    // cache if the call to Lex comes from CachingLex / PeekAhead).
+    Suffix.push_back(Result);
+
+    // Consume the pp-import-suffix and expand any macros in it now. We'll add
+    // it back into the token stream later.
+    CollectPpImportSuffix(Suffix);
+    if (Suffix.back().isNot(tok::semi)) {
+      // This is not a pp-import after all.
+      EnterTokens(Suffix);
+      return false;
+    }
+
+    // C++2a [cpp.module]p1:
+    //   The ';' preprocessing-token terminating a pp-import shall not have
+    //   been produced by macro replacement.
+    SourceLocation SemiLoc = Suffix.back().getLocation();
+    if (SemiLoc.isMacroID())
+      Diag(SemiLoc, diag::err_header_import_semi_in_macro);
+
+    // Reconstitute the import token.
+    Token ImportTok;
+    ImportTok.startToken();
+    ImportTok.setKind(tok::kw_import);
+    ImportTok.setLocation(ModuleImportLoc);
+    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
+    ImportTok.setLength(6);
+
+    auto Action = HandleHeaderIncludeOrImport(
+        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
+    switch (Action.Kind) {
+    case ImportAction::None:
+      break;
+
+    case ImportAction::ModuleBegin:
+      // Let the parser know we're textually entering the module.
+      Suffix.emplace_back();
+      Suffix.back().startToken();
+      Suffix.back().setKind(tok::annot_module_begin);
+      Suffix.back().setLocation(SemiLoc);
+      Suffix.back().setAnnotationEndLoc(SemiLoc);
+      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
+      LLVM_FALLTHROUGH;
+
+    case ImportAction::ModuleImport:
+    case ImportAction::SkippedModuleImport:
+      // We chose to import (or textually enter) the file. Convert the
+      // header-name token into a header unit annotation token.
+      Suffix[0].setKind(tok::annot_header_unit);
+      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
+      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
+      // FIXME: Call the moduleImport callback?
+      break;
+    }
+
+    EnterTokens(Suffix);
+    return false;
+  }
 
   // The token sequence
   //
@@ -916,7 +1211,7 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
                                               Result.getLocation()));
     ModuleImportExpectsIdentifier = false;
     CurLexerKind = CLK_LexAfterModuleImport;
-    return;
+    return true;
   }
 
   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
@@ -925,40 +1220,61 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
     ModuleImportExpectsIdentifier = true;
     CurLexerKind = CLK_LexAfterModuleImport;
-    return;
+    return true;
   }
 
-  // If we have a non-empty module path, load the named module.
-  if (!ModuleImportPath.empty()) {
-    // Under the Modules TS, the dot is just part of the module name, and not
-    // a real hierarchy separator. Flatten such module names now.
-    //
-    // FIXME: Is this the right level to be performing this transformation?
-    std::string FlatModuleName;
-    if (getLangOpts().ModulesTS) {
-      for (auto &Piece : ModuleImportPath) {
-        if (!FlatModuleName.empty())
-          FlatModuleName += ".";
-        FlatModuleName += Piece.first->getName();
-      }
-      SourceLocation FirstPathLoc = ModuleImportPath[0].second;
-      ModuleImportPath.clear();
-      ModuleImportPath.push_back(
-          std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
+  // If we didn't recognize a module name at all, this is not a (valid) import.
+  if (ModuleImportPath.empty() || Result.is(tok::eof))
+    return true;
+
+  // Consume the pp-import-suffix and expand any macros in it now, if we're not
+  // at the semicolon already.
+  SourceLocation SemiLoc = Result.getLocation();
+  if (Result.isNot(tok::semi)) {
+    Suffix.push_back(Result);
+    CollectPpImportSuffix(Suffix);
+    if (Suffix.back().isNot(tok::semi)) {
+      // This is not an import after all.
+      EnterTokens(Suffix);
+      return false;
     }
+    SemiLoc = Suffix.back().getLocation();
+  }
 
-    Module *Imported = nullptr;
-    if (getLangOpts().Modules) {
-      Imported = TheModuleLoader.loadModule(ModuleImportLoc,
-                                            ModuleImportPath,
-                                            Module::Hidden,
-                                            /*IsIncludeDirective=*/false);
-      if (Imported)
-        makeModuleVisible(Imported, ModuleImportLoc);
+  // Under the Modules TS, the dot is just part of the module name, and not
+  // a real hierarchy separator. Flatten such module names now.
+  //
+  // FIXME: Is this the right level to be performing this transformation?
+  std::string FlatModuleName;
+  if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
+    for (auto &Piece : ModuleImportPath) {
+      if (!FlatModuleName.empty())
+        FlatModuleName += ".";
+      FlatModuleName += Piece.first->getName();
     }
-    if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
-      Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
+    SourceLocation FirstPathLoc = ModuleImportPath[0].second;
+    ModuleImportPath.clear();
+    ModuleImportPath.push_back(
+        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
+  }
+
+  Module *Imported = nullptr;
+  if (getLangOpts().Modules) {
+    Imported = TheModuleLoader.loadModule(ModuleImportLoc,
+                                          ModuleImportPath,
+                                          Module::Hidden,
+                                          /*IsInclusionDirective=*/false);
+    if (Imported)
+      makeModuleVisible(Imported, SemiLoc);
   }
+  if (Callbacks)
+    Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
+
+  if (!Suffix.empty()) {
+    EnterTokens(Suffix);
+    return false;
+  }
+  return true;
 }
 
 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
@@ -1039,14 +1355,14 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
 
 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
   assert(Handler && "NULL comment handler");
-  assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
-         CommentHandlers.end() && "Comment handler already registered");
+  assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
+         "Comment handler already registered");
   CommentHandlers.push_back(Handler);
 }
 
 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
   std::vector<CommentHandler *>::iterator Pos =
-      std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
+      llvm::find(CommentHandlers, Handler);
   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
   CommentHandlers.erase(Pos);
 }
diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
index 9f930c3a3c6a0..5f6f4a13419be 100644
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp
@@ -1,9 +1,8 @@
 //===- PreprocessorLexer.cpp - C Language Family Lexer --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,9 +30,7 @@ PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid)
 /// After the preprocessor has parsed a \#include, lex and
 /// (potentially) macro expand the filename.
 void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
-  assert(ParsingPreprocessorDirective &&
-         ParsingFilename == false &&
-         "Must be in a preprocessing directive!");
+  assert(ParsingFilename == false && "reentered LexIncludeFilename");
 
   // We are now parsing a filename!
   ParsingFilename = true;
@@ -46,10 +43,6 @@ void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
 
   // We should have obtained the filename now.
   ParsingFilename = false;
-
-  // No filename?
-  if (FilenameTok.is(tok::eod))
-    PP->Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
 }
 
 /// getFileEntry - Return the FileEntry corresponding to this FileID.  Like
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index dc03e16daa8b6..19ab93ec54b47 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -1,9 +1,8 @@
 //===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index f810c28ccdf1d..e626cfcc927f5 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -1,9 +1,8 @@
 //===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -161,6 +160,11 @@ static char GetFirstChar(const Preprocessor &PP, const Token &Tok) {
 bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
                                      const Token &PrevTok,
                                      const Token &Tok) const {
+  // Conservatively assume that every annotation token that has a printable
+  // form requires whitespace.
+  if (PrevTok.isAnnotation())
+    return true;
+
   // First, check to see if the tokens were directly adjacent in the original
   // source.  If they were, it must be okay to stick them together: if there
   // were an issue, the tokens would have been lexed differently.
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 608e0dedebb7e..a7957e82e4955 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -1,9 +1,8 @@
 //===- TokenLexer.cpp - Lex from a token stream ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -54,6 +53,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
   Tokens = &*Macro->tokens_begin();
   OwnsTokens = false;
   DisableMacroExpansion = false;
+  IsReinject = false;
   NumTokens = Macro->tokens_end()-Macro->tokens_begin();
   MacroExpansionStart = SourceLocation();
 
@@ -92,7 +92,9 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
 /// Create a TokenLexer for the specified token stream.  This does not
 /// take ownership of the specified token vector.
 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
-                      bool disableMacroExpansion, bool ownsTokens) {
+                      bool disableMacroExpansion, bool ownsTokens,
+                      bool isReinject) {
+  assert(!isReinject || disableMacroExpansion);
   // If the client is reusing a TokenLexer, make sure to free any memory
   // associated with it.
   destroy();
@@ -102,6 +104,7 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
   Tokens = TokArray;
   OwnsTokens = ownsTokens;
   DisableMacroExpansion = disableMacroExpansion;
+  IsReinject = isReinject;
   NumTokens = NumToks;
   CurTokenIdx = 0;
   ExpandLocStart = ExpandLocEnd = SourceLocation();
@@ -244,8 +247,7 @@ void TokenLexer::ExpandFunctionArguments() {
   // we install the newly expanded sequence as the new 'Tokens' list.
   bool MadeChange = false;
 
-  const bool CalledWithVariadicArguments =
-      ActualArgs->invokedWithVariadicArgument(Macro);
+  Optional<bool> CalledWithVariadicArguments;
 
   VAOptExpansionContext VCtx(PP);
 
@@ -292,7 +294,12 @@ void TokenLexer::ExpandFunctionArguments() {
       // this token. Note sawClosingParen() returns true only if the r_paren matches
       // the closing r_paren of the __VA_OPT__.
       if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) {
-        if (!CalledWithVariadicArguments) {
+        // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__.
+        if (!CalledWithVariadicArguments.hasValue()) {
+          CalledWithVariadicArguments =
+              ActualArgs->invokedWithVariadicArgument(Macro, PP);
+        }
+        if (!*CalledWithVariadicArguments) {
           // Skip this token.
           continue;
         }
@@ -315,8 +322,8 @@ void TokenLexer::ExpandFunctionArguments() {
           stringifyVAOPTContents(ResultToks, VCtx,
                                  /*ClosingParenLoc*/ Tokens[I].getLocation());
 
-        } else if (/*No tokens within VAOPT*/ !(
-            ResultToks.size() - VCtx.getNumberOfTokensPriorToVAOpt())) {
+        } else if (/*No tokens within VAOPT*/
+                   ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) {
           // Treat VAOPT as a placemarker token.  Eat either the '##' before the
           // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that
           // hashhash was not a placemarker) or the '##'
@@ -327,6 +334,26 @@ void TokenLexer::ExpandFunctionArguments() {
           } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) {
             ++I; // Skip the following hashhash.
           }
+        } else {
+          // If there's a ## before the __VA_OPT__, we might have discovered
+          // that the __VA_OPT__ begins with a placeholder. We delay action on
+          // that to now to avoid messing up our stashed count of tokens before
+          // __VA_OPT__.
+          if (VCtx.beginsWithPlaceholder()) {
+            assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 &&
+                   ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() &&
+                   ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is(
+                       tok::hashhash) &&
+                   "no token paste before __VA_OPT__");
+            ResultToks.erase(ResultToks.begin() +
+                             VCtx.getNumberOfTokensPriorToVAOpt() - 1);
+          }
+          // If the expansion of __VA_OPT__ ends with a placeholder, eat any
+          // following '##' token.
+          if (VCtx.endsWithPlaceholder() && I + 1 != E &&
+              Tokens[I + 1].is(tok::hashhash)) {
+            ++I;
+          }
         }
         VCtx.reset();
         // We processed __VA_OPT__'s closing paren (and the exit out of
@@ -387,6 +414,7 @@ void TokenLexer::ExpandFunctionArguments() {
       !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
     bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash);
     bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash);
+    bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren);
 
     assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) &&
            "unexpected ## in ResultToks");
@@ -471,6 +499,18 @@ void TokenLexer::ExpandFunctionArguments() {
                                              NextTokGetsSpace);
         ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
         NextTokGetsSpace = false;
+      } else {
+        // We're creating a placeholder token. Usually this doesn't matter,
+        // but it can affect paste behavior when at the start or end of a
+        // __VA_OPT__.
+        if (NonEmptyPasteBefore) {
+          // We're imagining a placeholder token is inserted here. If this is
+          // the first token in a __VA_OPT__ after a ##, delete the ##.
+          assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__");
+          VCtx.hasPlaceholderAfterHashhashAtStart();
+        }
+        if (RParenAfter)
+          VCtx.hasPlaceholderBeforeRParen();
       }
       continue;
     }
@@ -535,6 +575,9 @@ void TokenLexer::ExpandFunctionArguments() {
       continue;
     }
 
+    if (RParenAfter)
+      VCtx.hasPlaceholderBeforeRParen();
+
     // If this is on the RHS of a paste operator, we've already copied the
     // paste operator to the ResultToks list, unless the LHS was empty too.
     // Remove it.
@@ -548,6 +591,8 @@ void TokenLexer::ExpandFunctionArguments() {
       if (!VCtx.isInVAOpt() ||
           ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt())
         ResultToks.pop_back();
+      else
+        VCtx.hasPlaceholderAfterHashhashAtStart();
     }
 
     // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
@@ -606,6 +651,8 @@ bool TokenLexer::Lex(Token &Tok) {
 
   // Get the next token to return.
   Tok = Tokens[CurTokenIdx++];
+  if (IsReinject)
+    Tok.setFlag(Token::IsReinjected);
 
   bool TokenIsFromPaste = false;
 
diff --git a/lib/Lex/UnicodeCharSets.h b/lib/Lex/UnicodeCharSets.h
index 116d553d20404..d56bc8ef6721e 100644
--- a/lib/Lex/UnicodeCharSets.h
+++ b/lib/Lex/UnicodeCharSets.h
@@ -1,9 +1,8 @@
 //===--- UnicodeCharSets.h - Contains important sets of characters --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_LIB_LEX_UNICODECHARSETS_H
author	Dimitry Andric <dim@FreeBSD.org>	2019-08-20 20:50:49 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-08-20 20:50:49 +0000
commit	2298981669bf3bd63335a4be179bc0f96823a8f4 (patch)
tree	1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /lib/Lex
parent	9a83721404652cea39e9f02ae3e3b5c964602a5c (diff)