src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-02-11 12:38:04 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-02-11 12:38:11 +0000
commit	e3b557809604d036af6e00c60f012c2025b59a5e (patch)
tree	8a11ba2269a3b669601e2fd41145b174008f4da8 /clang/lib/Lex
parent	08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff)

vendor/llvm-project/llvmorg-16-init-18548-gb0daacf58f41

Diffstat (limited to 'clang/lib/Lex')

-rw-r--r--

clang/lib/Lex/DependencyDirectivesScanner.cpp

-rw-r--r--

clang/lib/Lex/HeaderMap.cpp

-rw-r--r--

clang/lib/Lex/HeaderSearch.cpp

220

-rw-r--r--

clang/lib/Lex/InitHeaderSearch.cpp

146

-rw-r--r--

clang/lib/Lex/Lexer.cpp

283

-rw-r--r--

clang/lib/Lex/LiteralSupport.cpp

-rw-r--r--

clang/lib/Lex/MacroArgs.cpp

-rw-r--r--

clang/lib/Lex/MacroInfo.cpp

-rw-r--r--

clang/lib/Lex/ModuleMap.cpp

192

-rw-r--r--

clang/lib/Lex/PPCallbacks.cpp

-rw-r--r--

clang/lib/Lex/PPDirectives.cpp

119

-rw-r--r--

clang/lib/Lex/PPExpressions.cpp

-rw-r--r--

clang/lib/Lex/PPLexerChange.cpp

-rw-r--r--

clang/lib/Lex/PPMacroExpansion.cpp

144

-rw-r--r--

clang/lib/Lex/Pragma.cpp

-rw-r--r--

clang/lib/Lex/PreprocessingRecord.cpp

-rw-r--r--

clang/lib/Lex/Preprocessor.cpp

-rw-r--r--

clang/lib/Lex/TokenConcatenation.cpp

-rw-r--r--

clang/lib/Lex/TokenLexer.cpp

114

-rw-r--r--

clang/lib/Lex/UnicodeCharSets.h

305

20 files changed, 1084 insertions, 778 deletions

diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 567ca81f6ac2..0adbaa36bf7c 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp

@@ -23,6 +23,7 @@

#include "llvm/ADT/SmallString.h"

#include "llvm/ADT/StringMap.h"

#include "llvm/ADT/StringSwitch.h"

+#include <optional>

using namespace clang;

using namespace clang::dependency_directives_scan;

@@ -81,46 +82,49 @@ struct Scanner {

private:

/// Lexes next token and advances \p First and the \p Lexer.

- LLVM_NODISCARD dependency_directives_scan::Token &

+ [[nodiscard]] dependency_directives_scan::Token &

lexToken(const char *&First, const char *const End);

dependency_directives_scan::Token &lexIncludeFilename(const char *&First,

const char *const End);

+ void skipLine(const char *&First, const char *const End);

+ void skipDirective(StringRef Name, const char *&First, const char *const End);

/// Lexes next token and if it is identifier returns its string, otherwise

- /// it skips the current line and returns \p None.

+ /// it skips the current line and returns \p std::nullopt.

///

/// In any case (whatever the token kind) \p First and the \p Lexer will

/// advance beyond the token.

- LLVM_NODISCARD Optional<StringRef>

+ [[nodiscard]] std::optional<StringRef>

tryLexIdentifierOrSkipLine(const char *&First, const char *const End);

/// Used when it is certain that next token is an identifier.

- LLVM_NODISCARD StringRef lexIdentifier(const char *&First,

- const char *const End);

+ [[nodiscard]] StringRef lexIdentifier(const char *&First,

+ const char *const End);

/// Lexes next token and returns true iff it is an identifier that matches \p

/// Id, otherwise it skips the current line and returns false.

///

/// In any case (whatever the token kind) \p First and the \p Lexer will

/// advance beyond the token.

- LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id,

- const char *&First,

- const char *const End);

- LLVM_NODISCARD bool scanImpl(const char *First, const char *const End);

- LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);

- LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);

- LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);

- LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First,

+ [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id,

+ const char *&First,

+ const char *const End);

+ [[nodiscard]] bool scanImpl(const char *First, const char *const End);

+ [[nodiscard]] bool lexPPLine(const char *&First, const char *const End);

+ [[nodiscard]] bool lexAt(const char *&First, const char *const End);

+ [[nodiscard]] bool lexModule(const char *&First, const char *const End);

+ [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First,

+ const char *const End);

+ [[nodiscard]] bool lexPragma(const char *&First, const char *const End);

+ [[nodiscard]] bool lexEndif(const char *&First, const char *const End);

+ [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First,

const char *const End);

- LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);

- LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);

- LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First,

- const char *const End);

- LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind,

- const char *&First,

- const char *const End);

+ [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind,

+ const char *&First,

+ const char *const End);

void lexPPDirectiveBody(const char *&First, const char *const End);

DirectiveWithTokens &pushDirective(DirectiveKind Kind) {

@@ -150,6 +154,7 @@ private:

DiagnosticsEngine *Diags;

SourceLocation InputSourceLoc;

+ const char *LastTokenPtr = nullptr;

/// Keeps track of the tokens for the currently lexed directive. Once a

/// directive is fully lexed and "committed" then the tokens get appended to

/// \p Tokens and \p CurDirToks is cleared for the next directive.

@@ -177,8 +182,8 @@ static void skipOverSpaces(const char *&First, const char *const End) {

++First;

}

-LLVM_NODISCARD static bool isRawStringLiteral(const char *First,

- const char *Current) {

+[[nodiscard]] static bool isRawStringLiteral(const char *First,

+ const char *Current) {

assert(First <= Current);

// Check if we can even back up.

@@ -364,7 +369,7 @@ static bool isQuoteCppDigitSeparator(const char *const Start,

return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));

}

-static void skipLine(const char *&First, const char *const End) {

+void Scanner::skipLine(const char *&First, const char *const End) {

for (;;) {

assert(First <= End);

if (First == End)

@@ -379,6 +384,7 @@ static void skipLine(const char *&First, const char *const End) {

// Iterate over strings correctly to avoid comments and newlines.

if (*First == '"' ||

(*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {

+ LastTokenPtr = First;

if (isRawStringLiteral(Start, First))

skipRawString(First, End);

else

@@ -388,6 +394,7 @@ static void skipLine(const char *&First, const char *const End) {

// Iterate over comments correctly.

if (*First != '/' || End - First < 2) {

+ LastTokenPtr = First;

++First;

continue;

}

@@ -399,6 +406,7 @@ static void skipLine(const char *&First, const char *const End) {

}

if (First[1] != '*') {

+ LastTokenPtr = First;

++First;

continue;

}

@@ -416,8 +424,8 @@ static void skipLine(const char *&First, const char *const End) {

}

-static void skipDirective(StringRef Name, const char *&First,

- const char *const End) {

+void Scanner::skipDirective(StringRef Name, const char *&First,

+ const char *const End) {

if (llvm::StringSwitch<bool>(Name)

.Case("warning", true)

.Case("error", true)

@@ -517,13 +525,13 @@ void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {

}

-LLVM_NODISCARD Optional<StringRef>

+[[nodiscard]] std::optional<StringRef>

Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {

const dependency_directives_scan::Token &Tok = lexToken(First, End);

if (Tok.isNot(tok::raw_identifier)) {

if (!Tok.is(tok::eod))

skipLine(First, End);

- return None;

+ return std::nullopt;

}

bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;

@@ -548,14 +556,15 @@ Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {

}

StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {

- Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);

+ std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);

assert(Id && "expected identifier token");

- return Id.value();

+ return *Id;

}

bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,

const char *const End) {

- if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) {

+ if (std::optional<StringRef> FoundId =

+ tryLexIdentifierOrSkipLine(First, End)) {

if (*FoundId == Id)

return true;

skipLine(First, End);

@@ -581,7 +590,7 @@ bool Scanner::lexModule(const char *&First, const char *const End) {

bool Export = false;

if (Id == "export") {

Export = true;

- Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);

+ std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);

if (!NextId)

return false;

Id = *NextId;

@@ -621,7 +630,7 @@ bool Scanner::lexModule(const char *&First, const char *const End) {

}

bool Scanner::lexPragma(const char *&First, const char *const End) {

- Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);

+ std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);

if (!FoundId)

return false;

@@ -710,6 +719,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {

return false;

}

+ LastTokenPtr = First;

TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);

auto ScEx1 = make_scope_exit([&]() {

@@ -733,10 +744,18 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {

// Lex '#'.

const dependency_directives_scan::Token &HashTok = lexToken(First, End);

+ if (HashTok.is(tok::hashhash)) {

+ // A \p tok::hashhash at this location is passed by the preprocessor to the

+ // parser to interpret, like any other token. So for dependency scanning

+ // skip it like a normal token not affecting the preprocessor.

+ skipLine(First, End);

+ assert(First <= End);

+ return false;

+ }

assert(HashTok.is(tok::hash));

(void)HashTok;

- Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);

+ std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);

if (!FoundId)

return false;

@@ -803,6 +822,9 @@ bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {

if (!Error) {

// Add an EOF on success.

+ if (LastTokenPtr &&

+ (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))

+ pushDirective(tokens_present_before_eof);

pushDirective(pp_eof);

}

@@ -851,7 +873,9 @@ void clang::printDependencyDirectivesAsSource(

};

for (const dependency_directives_scan::Directive &Directive : Directives) {

- Optional<tok::TokenKind> PrevTokenKind;

+ if (Directive.Kind == tokens_present_before_eof)

+ OS << "<TokBeforeEOF>";

+ std::optional<tok::TokenKind> PrevTokenKind;

for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {

if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))

OS << ' ';

diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp
index 0001fc348eda..bb50a4eef65c 100644
--- a/clang/lib/Lex/HeaderMap.cpp
+++ b/clang/lib/Lex/HeaderMap.cpp

@@ -23,6 +23,7 @@

#include "llvm/Support/Debug.h"

#include <cstring>

#include <memory>

+#include <optional>

using namespace clang;

/// HashHMapKey - This is the 'well known' hash function required by the file

@@ -145,13 +146,13 @@ HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const {

return Result;

}

-Optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const {

+std::optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const {

// Add the start of the string table to the idx.

StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);

// Check for invalid index.

if (StrTabIdx >= FileBuffer->getBufferSize())

- return None;

+ return std::nullopt;

const char *Data = FileBuffer->getBufferStart() + StrTabIdx;

unsigned MaxLen = FileBuffer->getBufferSize() - StrTabIdx;

@@ -159,7 +160,7 @@ Optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const {

// Check whether the buffer is null-terminated.

if (Len == MaxLen && Data[Len - 1])

- return None;

+ return std::nullopt;

return StringRef(Data, Len);

}

@@ -177,7 +178,7 @@ LLVM_DUMP_METHOD void HeaderMapImpl::dump() const {

<< ", " << getEndianAdjustedWord(Hdr.NumEntries) << "\n";

auto getStringOrInvalid = [this](unsigned Id) -> StringRef {

- if (Optional<StringRef> S = getString(Id))

+ if (std::optional<StringRef> S = getString(Id))

return *S;

return "<invalid>";

};

@@ -208,7 +209,7 @@ StringRef HeaderMapImpl::lookupFilename(StringRef Filename,

if (B.Key == HMAP_EmptyBucketKey) return StringRef(); // Hash miss.

// See if the key matches. If not, probe on.

- Optional<StringRef> Key = getString(B.Key);

+ std::optional<StringRef> Key = getString(B.Key);

if (LLVM_UNLIKELY(!Key))

continue;

if (!Filename.equals_insensitive(*Key))

@@ -216,8 +217,8 @@ StringRef HeaderMapImpl::lookupFilename(StringRef Filename,

// If so, we have a match in the hash table. Construct the destination

// path.

- Optional<StringRef> Prefix = getString(B.Prefix);

- Optional<StringRef> Suffix = getString(B.Suffix);

+ std::optional<StringRef> Prefix = getString(B.Prefix);

+ std::optional<StringRef> Suffix = getString(B.Suffix);

DestPath.clear();

if (LLVM_LIKELY(Prefix && Suffix)) {

@@ -240,9 +241,9 @@ StringRef HeaderMapImpl::reverseLookupFilename(StringRef DestPath) const {

if (B.Key == HMAP_EmptyBucketKey)

continue;

- Optional<StringRef> Key = getString(B.Key);

- Optional<StringRef> Prefix = getString(B.Prefix);

- Optional<StringRef> Suffix = getString(B.Suffix);

+ std::optional<StringRef> Key = getString(B.Key);

+ std::optional<StringRef> Prefix = getString(B.Prefix);

+ std::optional<StringRef> Suffix = getString(B.Suffix);

if (LLVM_LIKELY(Key && Prefix && Suffix)) {

SmallVector<char, 1024> Buf;

Buf.append(Prefix->begin(), Prefix->end());

diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 60fd42bc1127..074c147ba3c5 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp

@@ -116,6 +116,7 @@ void HeaderSearch::SetSearchPaths(

NoCurDirSearch = noCurDirSearch;

SearchDirToHSEntry = std::move(searchDirToHSEntry);

//LookupFileCache.clear();

+ indexInitialHeaderMaps();

}

void HeaderSearch::AddSearchPath(const DirectoryLookup &dir, bool isAngled) {

@@ -170,11 +171,11 @@ void HeaderSearch::getHeaderMapFileNames(

}

std::string HeaderSearch::getCachedModuleFileName(Module *Module) {

- const FileEntry *ModuleMap =

+ OptionalFileEntryRef ModuleMap =

getModuleMap().getModuleMapFileForUniquing(Module);

// The ModuleMap maybe a nullptr, when we load a cached C++ module without

// *.modulemap file. In this case, just return an empty string.

- if (ModuleMap == nullptr)

+ if (!ModuleMap)

return {};

return getCachedModuleFileName(Module->Name, ModuleMap->getName());

}

@@ -211,7 +212,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName,

}

std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) {

- const FileEntry *ModuleMap =

+ OptionalFileEntryRef ModuleMap =

getModuleMap().getModuleMapFileForUniquing(Module);

StringRef ModuleName = Module->Name;

StringRef ModuleMapPath = ModuleMap->getName();

@@ -255,18 +256,11 @@ std::string HeaderSearch::getCachedModuleFileNameImpl(StringRef ModuleName,

// To avoid false-negatives, we form as canonical a path as we can, and map

// to lower-case in case we're on a case-insensitive file system.

- std::string Parent =

- std::string(llvm::sys::path::parent_path(ModuleMapPath));

- if (Parent.empty())

- Parent = ".";

- auto Dir = FileMgr.getDirectory(Parent);

- if (!Dir)

+ SmallString<128> CanonicalPath(ModuleMapPath);

+ if (getModuleMap().canonicalizeModuleMapPath(CanonicalPath))

return {};

- auto DirName = FileMgr.getCanonicalName(*Dir);

- auto FileName = llvm::sys::path::filename(ModuleMapPath);

- llvm::hash_code Hash =

- llvm::hash_combine(DirName.lower(), FileName.lower());

+ llvm::hash_code Hash = llvm::hash_combine(CanonicalPath.str().lower());

SmallString<128> HashStr;

llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);

@@ -311,7 +305,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,

// Look through the various header search paths to load any available module

// maps, searching for a module map that describes this module.

- for (DirectoryLookup Dir : search_dir_range()) {

+ for (DirectoryLookup &Dir : search_dir_range()) {

if (Dir.isFramework()) {

// Search for or infer a module map for a framework. Here we use

// SearchName rather than ModuleName, to permit finding private modules

@@ -335,7 +329,8 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,

continue;

bool IsSystem = Dir.isSystemHeaderDirectory();

- // Only returns None if not a normal directory, which we just checked

+ // Only returns std::nullopt if not a normal directory, which we just

+ // checked

DirectoryEntryRef NormalDir = *Dir.getDirRef();

// Search for a module map file in this directory.

if (loadModuleMapFile(NormalDir, IsSystem,

@@ -379,6 +374,31 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,

return Module;

}

+void HeaderSearch::indexInitialHeaderMaps() {

+ llvm::StringMap<unsigned, llvm::BumpPtrAllocator> Index(SearchDirs.size());

+ // Iterate over all filename keys and associate them with the index i.

+ unsigned i = 0;

+ for (; i != SearchDirs.size(); ++i) {

+ auto &Dir = SearchDirs[i];

+ // We're concerned with only the initial contiguous run of header

+ // maps within SearchDirs, which can be 99% of SearchDirs when

+ // SearchDirs.size() is ~10000.

+ if (!Dir.isHeaderMap())

+ break;

+ // Give earlier keys precedence over identical later keys.

+ auto Callback = [&](StringRef Filename) {

+ Index.try_emplace(Filename.lower(), i);

+ };

+ Dir.getHeaderMap()->forEachKey(Callback);

+ }

+ SearchDirHeaderMapIndex = std::move(Index);

+ FirstNonHeaderMapSearchDirIdx = i;

//===----------------------------------------------------------------------===//

// File lookup within a DirectoryLookup scope

//===----------------------------------------------------------------------===//

@@ -395,13 +415,14 @@ StringRef DirectoryLookup::getName() const {

return getHeaderMap()->getFileName();

}

-Optional<FileEntryRef> HeaderSearch::getFileAndSuggestModule(

+OptionalFileEntryRef HeaderSearch::getFileAndSuggestModule(

StringRef FileName, SourceLocation IncludeLoc, const DirectoryEntry *Dir,

bool IsSystemHeaderDir, Module *RequestingModule,

- ModuleMap::KnownHeader *SuggestedModule) {

+ ModuleMap::KnownHeader *SuggestedModule, bool OpenFile /*=true*/,

+ bool CacheFailures /*=true*/) {

// If we have a module map that might map this header, load it and

// check whether we'll have a suggestion for a module.

- auto File = getFileMgr().getFileRef(FileName, /*OpenFile=*/true);

+ auto File = getFileMgr().getFileRef(FileName, OpenFile, CacheFailures);

if (!File) {

// For rare, surprising errors (e.g. "out of file handles"), diag the EC

// message.

@@ -412,26 +433,27 @@ Optional<FileEntryRef> HeaderSearch::getFileAndSuggestModule(

Diags.Report(IncludeLoc, diag::err_cannot_open_file)

<< FileName << EC.message();

}

- return None;

+ return std::nullopt;

}

// If there is a module that corresponds to this header, suggest it.

if (!findUsableModuleForHeader(

&File->getFileEntry(), Dir ? Dir : File->getFileEntry().getDir(),

RequestingModule, SuggestedModule, IsSystemHeaderDir))

- return None;

+ return std::nullopt;

return *File;

}

/// LookupFile - Lookup the specified file in this search path, returning it

/// if it exists or returning null if not.

-Optional<FileEntryRef> DirectoryLookup::LookupFile(

+OptionalFileEntryRef DirectoryLookup::LookupFile(

StringRef &Filename, HeaderSearch &HS, SourceLocation IncludeLoc,

SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,

Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,

bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound,

- bool &IsInHeaderMap, SmallVectorImpl<char> &MappedName) const {

+ bool &IsInHeaderMap, SmallVectorImpl<char> &MappedName,

+ bool OpenFile) const {

InUserSpecifiedSystemFramework = false;

IsInHeaderMap = false;

MappedName.clear();

@@ -451,9 +473,9 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile(

RelativePath->append(Filename.begin(), Filename.end());

}

- return HS.getFileAndSuggestModule(TmpDir, IncludeLoc, getDir(),

- isSystemHeaderDirectory(),

- RequestingModule, SuggestedModule);

+ return HS.getFileAndSuggestModule(

+ TmpDir, IncludeLoc, getDir(), isSystemHeaderDirectory(),

+ RequestingModule, SuggestedModule, OpenFile);

}

if (isFramework())

@@ -466,7 +488,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile(

SmallString<1024> Path;

StringRef Dest = HM->lookupFilename(Filename, Path);

if (Dest.empty())

- return None;

+ return std::nullopt;

IsInHeaderMap = true;

@@ -491,7 +513,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile(

Dest = HM->lookupFilename(Filename, Path);

}

- if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest)) {

+ if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest, OpenFile)) {

FixupSearchPath();

return *Res;

}

@@ -501,7 +523,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile(

// function as part of the regular logic that applies to include search paths.

// The case where the target file **does not exist** is handled here:

HS.noteLookupUsage(HS.searchDirIdx(*this), IncludeLoc);

- return None;

+ return std::nullopt;

}

/// Given a framework directory, find the top-most framework directory.

@@ -510,7 +532,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile(

/// \param DirName The name of the framework directory.

/// \param SubmodulePath Will be populated with the submodule path from the

/// returned top-level module to the originally named framework.

-static Optional<DirectoryEntryRef>

+static OptionalDirectoryEntryRef

getTopFrameworkDir(FileManager &FileMgr, StringRef DirName,

SmallVectorImpl<std::string> &SubmodulePath) {

assert(llvm::sys::path::extension(DirName) == ".framework" &&

@@ -564,7 +586,7 @@ static bool needModuleLookup(Module *RequestingModule,

/// DoFrameworkLookup - Do a lookup of the specified file in the current

/// DirectoryLookup, which is a framework directory.

-Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup(

+OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup(

StringRef Filename, HeaderSearch &HS, SmallVectorImpl<char> *SearchPath,

SmallVectorImpl<char> *RelativePath, Module *RequestingModule,

ModuleMap::KnownHeader *SuggestedModule,

@@ -574,7 +596,7 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup(

// Framework names must have a '/' in the filename.

size_t SlashPos = Filename.find('/');

if (SlashPos == StringRef::npos)

- return None;

+ return std::nullopt;

// Find out if this is the home for the specified framework, by checking

// HeaderSearch. Possible answers are yes/no and unknown.

@@ -583,7 +605,7 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup(

// If it is known and in some other directory, fail.

if (CacheEntry.Directory && CacheEntry.Directory != getFrameworkDirRef())

- return None;

+ return std::nullopt;

// Otherwise, construct the path to this framework dir.

@@ -607,7 +629,7 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup(

// If the framework dir doesn't exist, we fail.

auto Dir = FileMgr.getDirectory(FrameworkName);

if (!Dir)

- return None;

+ return std::nullopt;

// Otherwise, if it does, remember that this is the right direntry for this

// framework.

@@ -690,17 +712,17 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup(

if (!HS.findUsableModuleForFrameworkHeader(

&File->getFileEntry(), FrameworkPath, RequestingModule,

SuggestedModule, IsSystem))

- return None;

+ return std::nullopt;

} else {

if (!HS.findUsableModuleForHeader(&File->getFileEntry(), getDir(),

RequestingModule, SuggestedModule,

IsSystem))

- return None;

+ return std::nullopt;

}

if (File)

return *File;

- return None;

+ return std::nullopt;

}

void HeaderSearch::cacheLookupSuccess(LookupFileCacheInfo &CacheLookup,

@@ -833,14 +855,14 @@ diagnoseFrameworkInclude(DiagnosticsEngine &Diags, SourceLocation IncludeLoc,

/// for system \#include's or not (i.e. using <> instead of ""). Includers, if

/// non-empty, indicates where the \#including file(s) are, in case a relative

/// search is needed. Microsoft mode will pass all \#including files.

-Optional<FileEntryRef> HeaderSearch::LookupFile(

+OptionalFileEntryRef HeaderSearch::LookupFile(

StringRef Filename, SourceLocation IncludeLoc, bool isAngled,

ConstSearchDirIterator FromDir, ConstSearchDirIterator *CurDirArg,

ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers,

SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,

Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,

bool *IsMapped, bool *IsFrameworkFound, bool SkipCache,

- bool BuildSystemModule) {

+ bool BuildSystemModule, bool OpenFile, bool CacheFailures) {

ConstSearchDirIterator CurDirLocal = nullptr;

ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;

@@ -859,7 +881,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

// If this was an #include_next "/absolute/file", fail.

if (FromDir)

- return None;

+ return std::nullopt;

if (SearchPath)

SearchPath->clear();

@@ -869,13 +891,14 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

}

// Otherwise, just return the file.

return getFileAndSuggestModule(Filename, IncludeLoc, nullptr,

- /*IsSystemHeaderDir*/false,

- RequestingModule, SuggestedModule);

+ /*IsSystemHeaderDir*/ false,

+ RequestingModule, SuggestedModule, OpenFile,

+ CacheFailures);

}

// This is the header that MSVC's header search would have found.

ModuleMap::KnownHeader MSSuggestedModule;

- Optional<FileEntryRef> MSFE;

+ OptionalFileEntryRef MSFE;

// Unless disabled, check to see if the file is in the #includer's

// directory. This cannot be based on CurDir, because each includer could be

@@ -904,7 +927,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

bool IncluderIsSystemHeader =

Includer ? getFileInfo(Includer).DirInfo != SrcMgr::C_User :

BuildSystemModule;

- if (Optional<FileEntryRef> FE = getFileAndSuggestModule(

+ if (OptionalFileEntryRef FE = getFileAndSuggestModule(

TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader,

RequestingModule, SuggestedModule)) {

if (!Includer) {

@@ -981,24 +1004,37 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

ConstSearchDirIterator NextIt = std::next(It);

- // If the entry has been previously looked up, the first value will be

- // non-zero. If the value is equal to i (the start point of our search), then

- // this is a matching hit.

- if (!SkipCache && CacheLookup.StartIt == NextIt) {

- // Skip querying potentially lots of directories for this lookup.

- if (CacheLookup.HitIt)

- It = CacheLookup.HitIt;

- if (CacheLookup.MappedName) {

- Filename = CacheLookup.MappedName;

- if (IsMapped)

- *IsMapped = true;

+ if (!SkipCache) {

+ if (CacheLookup.StartIt == NextIt) {

+ // HIT: Skip querying potentially lots of directories for this lookup.

+ if (CacheLookup.HitIt)

+ It = CacheLookup.HitIt;

+ if (CacheLookup.MappedName) {

+ Filename = CacheLookup.MappedName;

+ if (IsMapped)

+ *IsMapped = true;

+ }

+ } else {

+ // MISS: This is the first query, or the previous query didn't match

+ // our search start. We will fill in our found location below, so prime

+ // the start point value.

+ CacheLookup.reset(/*NewStartIt=*/NextIt);

+ if (It == search_dir_begin() && FirstNonHeaderMapSearchDirIdx > 0) {

+ // Handle cold misses of user includes in the presence of many header

+ // maps. We avoid searching perhaps thousands of header maps by

+ // jumping directly to the correct one or jumping beyond all of them.

+ auto Iter = SearchDirHeaderMapIndex.find(Filename.lower());

+ if (Iter == SearchDirHeaderMapIndex.end())

+ // Not in index => Skip to first SearchDir after initial header maps

+ It = search_dir_nth(FirstNonHeaderMapSearchDirIdx);

+ else

+ // In index => Start with a specific header map

+ It = search_dir_nth(Iter->second);

+ }

}

- } else {

- // Otherwise, this is the first query, or the previous query didn't match

- // our search start. We will fill in our found location below, so prime the

- // start point value.

+ } else

CacheLookup.reset(/*NewStartIt=*/NextIt);

- }

SmallString<64> MappedName;

@@ -1007,10 +1043,10 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

bool InUserSpecifiedSystemFramework = false;

bool IsInHeaderMap = false;

bool IsFrameworkFoundInDir = false;

- Optional<FileEntryRef> File = It->LookupFile(

+ OptionalFileEntryRef File = It->LookupFile(

Filename, *this, IncludeLoc, SearchPath, RelativePath, RequestingModule,

SuggestedModule, InUserSpecifiedSystemFramework, IsFrameworkFoundInDir,

- IsInHeaderMap, MappedName);

+ IsInHeaderMap, MappedName, OpenFile);

if (!MappedName.empty()) {

assert(IsInHeaderMap && "MappedName should come from a header map");

CacheLookup.MappedName =

@@ -1102,7 +1138,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

ScratchFilename += '/';

ScratchFilename += Filename;

- Optional<FileEntryRef> File = LookupFile(

+ OptionalFileEntryRef File = LookupFile(

ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, &CurDir,

Includers.front(), SearchPath, RelativePath, RequestingModule,

SuggestedModule, IsMapped, /*IsFrameworkFound=*/nullptr);

@@ -1131,7 +1167,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

// Otherwise, didn't find it. Remember we didn't find this.

CacheLookup.HitIt = search_dir_end();

- return None;

+ return std::nullopt;

}

/// LookupSubframeworkHeader - Look up a subframework for the specified

@@ -1139,7 +1175,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(

/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox

/// is a subframework within Carbon.framework. If so, return the FileEntry

/// for the designated file, otherwise return null.

-Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(

+OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader(

StringRef Filename, const FileEntry *ContextFileEnt,

SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,

Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule) {

@@ -1149,7 +1185,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(

// FIXME: Should we permit '\' on Windows?

size_t SlashPos = Filename.find('/');

if (SlashPos == StringRef::npos)

- return None;

+ return std::nullopt;

// Look up the base framework name of the ContextFileEnt.

StringRef ContextName = ContextFileEnt->getName();

@@ -1160,7 +1196,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(

if (FrameworkPos == StringRef::npos ||

(ContextName[FrameworkPos + DotFrameworkLen] != '/' &&

ContextName[FrameworkPos + DotFrameworkLen] != '\\'))

- return None;

+ return std::nullopt;

SmallString<1024> FrameworkName(ContextName.data(), ContextName.data() +

FrameworkPos +

@@ -1180,7 +1216,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(

CacheLookup.first().size() == FrameworkName.size() &&

memcmp(CacheLookup.first().data(), &FrameworkName[0],

CacheLookup.first().size()) != 0)

- return None;

+ return std::nullopt;

// Cache subframework.

if (!CacheLookup.second.Directory) {

@@ -1189,7 +1225,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(

// If the framework dir doesn't exist, we fail.

auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkName);

if (!Dir)

- return None;

+ return std::nullopt;

// Otherwise, if it does, remember that this is the right direntry for this

// framework.

@@ -1227,7 +1263,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(

File = FileMgr.getOptionalFileRef(HeadersFilename, /*OpenFile=*/true);

if (!File)

- return None;

+ return std::nullopt;

}

// This file is a system header or C++ unfriendly if the old file is.

@@ -1242,7 +1278,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader(

if (!findUsableModuleForFrameworkHeader(&File->getFileEntry(), FrameworkName,

RequestingModule, SuggestedModule,

/*IsSystem*/ false))

- return None;

+ return std::nullopt;

return *File;

}

@@ -1348,7 +1384,7 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) {

void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE,

ModuleMap::ModuleHeaderRole Role,

bool isCompilingModuleHeader) {

- bool isModularHeader = !(Role & ModuleMap::TextualHeader);

+ bool isModularHeader = ModuleMap::isModular(Role);

// Don't mark the file info as non-external if there's nothing to change.

if (!isCompilingModuleHeader) {

@@ -1519,14 +1555,14 @@ bool HeaderSearch::hasModuleMap(StringRef FileName,

}

ModuleMap::KnownHeader

-HeaderSearch::findModuleForHeader(const FileEntry *File,

- bool AllowTextual) const {

+HeaderSearch::findModuleForHeader(const FileEntry *File, bool AllowTextual,

+ bool AllowExcluded) const {

if (ExternalSource) {

// Make sure the external source has handled header info about this file,

// which includes whether the file is part of a module.

(void)getExistingFileInfo(File);

}

- return ModMap.findModuleForHeader(File, AllowTextual);

+ return ModMap.findModuleForHeader(File, AllowTextual, AllowExcluded);

}

ArrayRef<ModuleMap::KnownHeader>

@@ -1560,6 +1596,8 @@ static bool suggestModule(HeaderSearch &HS, const FileEntry *File,

*SuggestedModule = ModuleMap::KnownHeader();

return true;

}

+ // TODO: Add this module (or just its module map file) into something like

+ // `RequestingModule->AffectingClangModules`.

return false;

}

@@ -1590,7 +1628,7 @@ bool HeaderSearch::findUsableModuleForFrameworkHeader(

if (needModuleLookup(RequestingModule, SuggestedModule)) {

// Find the top-level framework based on this framework.

SmallVector<std::string, 4> SubmodulePath;

- Optional<DirectoryEntryRef> TopFrameworkDir =

+ OptionalDirectoryEntryRef TopFrameworkDir =

::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath);

assert(TopFrameworkDir && "Could not find the top-most framework dir");

@@ -1630,7 +1668,7 @@ bool HeaderSearch::loadModuleMapFile(const FileEntry *File, bool IsSystem,

StringRef OriginalModuleMapFile) {

// Find the directory for the module. For frameworks, that may require going

// up from the 'Modules' directory.

- Optional<DirectoryEntryRef> Dir;

+ OptionalDirectoryEntryRef Dir;

if (getHeaderSearchOpts().ModuleMapFileHomeIsCwd) {

Dir = FileMgr.getOptionalDirectoryRef(".");

} else {

@@ -1891,32 +1929,28 @@ std::string HeaderSearch::suggestPathToFileForDiagnostics(

llvm::StringRef File, llvm::StringRef WorkingDir, llvm::StringRef MainFile,

bool *IsSystem) {

using namespace llvm::sys;

+ llvm::SmallString<32> FilePath = File;

+ // remove_dots switches to backslashes on windows as a side-effect!

+ // We always want to suggest forward slashes for includes.

+ // (not remove_dots(..., posix) as that misparses windows paths).

+ path::remove_dots(FilePath, /*remove_dot_dot=*/true);

+ path::native(FilePath, path::Style::posix);

+ File = FilePath;

unsigned BestPrefixLength = 0;

// Checks whether `Dir` is a strict path prefix of `File`. If so and that's

// the longest prefix we've seen so for it, returns true and updates the

// `BestPrefixLength` accordingly.

- auto CheckDir = [&](llvm::StringRef Dir) -> bool {

- llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());

+ auto CheckDir = [&](llvm::SmallString<32> Dir) -> bool {

if (!WorkingDir.empty() && !path::is_absolute(Dir))

- fs::make_absolute(WorkingDir, DirPath);

- path::remove_dots(DirPath, /*remove_dot_dot=*/true);

- Dir = DirPath;

+ fs::make_absolute(WorkingDir, Dir);

+ path::remove_dots(Dir, /*remove_dot_dot=*/true);

for (auto NI = path::begin(File), NE = path::end(File),

DI = path::begin(Dir), DE = path::end(Dir);

- /*termination condition in loop*/; ++NI, ++DI) {

- // '.' components in File are ignored.

- while (NI != NE && *NI == ".")

- ++NI;

- if (NI == NE)

- break;

- // '.' components in Dir are ignored.

- while (DI != DE && *DI == ".")

- ++DI;

+ NI != NE; ++NI, ++DI) {

if (DI == DE) {

- // Dir is a prefix of File, up to '.' components and choice of path

- // separators.

+ // Dir is a prefix of File, up to choice of path separators.

unsigned PrefixLength = NI - path::begin(File);

if (PrefixLength > BestPrefixLength) {

BestPrefixLength = PrefixLength;

diff --git a/clang/lib/Lex/InitHeaderSearch.cpp b/clang/lib/Lex/InitHeaderSearch.cpp
index 158b5667151f..d4465565718e 100644
--- a/clang/lib/Lex/InitHeaderSearch.cpp
+++ b/clang/lib/Lex/InitHeaderSearch.cpp

@@ -26,6 +26,7 @@

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/Path.h"

#include "llvm/Support/raw_ostream.h"

+#include <optional>

using namespace clang;

using namespace clang::frontend;

@@ -35,16 +36,16 @@ namespace {

struct DirectoryLookupInfo {

IncludeDirGroup Group;

DirectoryLookup Lookup;

- Optional<unsigned> UserEntryIdx;

+ std::optional<unsigned> UserEntryIdx;

DirectoryLookupInfo(IncludeDirGroup Group, DirectoryLookup Lookup,

- Optional<unsigned> UserEntryIdx)

+ std::optional<unsigned> UserEntryIdx)

: Group(Group), Lookup(Lookup), UserEntryIdx(UserEntryIdx) {}

};

-/// InitHeaderSearch - This class makes it easier to set the search paths of

-/// a HeaderSearch object. InitHeaderSearch stores several search path lists

-/// internally, which can be sent to a HeaderSearch object in one swoop.

+/// This class makes it easier to set the search paths of a HeaderSearch object.

+/// InitHeaderSearch stores several search path lists internally, which can be

+/// sent to a HeaderSearch object in one swoop.

class InitHeaderSearch {

std::vector<DirectoryLookupInfo> IncludePath;

std::vector<std::pair<std::string, bool> > SystemHeaderPrefixes;

@@ -58,56 +59,54 @@ public:

: Headers(HS), Verbose(verbose), IncludeSysroot(std::string(sysroot)),

HasSysroot(!(sysroot.empty() || sysroot == "/")) {}

- /// AddPath - Add the specified path to the specified group list, prefixing

- /// the sysroot if used.

+ /// Add the specified path to the specified group list, prefixing the sysroot

+ /// if used.

/// Returns true if the path exists, false if it was ignored.

bool AddPath(const Twine &Path, IncludeDirGroup Group, bool isFramework,

- Optional<unsigned> UserEntryIdx = None);

+ std::optional<unsigned> UserEntryIdx = std::nullopt);

- /// AddUnmappedPath - Add the specified path to the specified group list,

- /// without performing any sysroot remapping.

+ /// Add the specified path to the specified group list, without performing any

+ /// sysroot remapping.

/// Returns true if the path exists, false if it was ignored.

bool AddUnmappedPath(const Twine &Path, IncludeDirGroup Group,

bool isFramework,

- Optional<unsigned> UserEntryIdx = None);

+ std::optional<unsigned> UserEntryIdx = std::nullopt);

- /// AddSystemHeaderPrefix - Add the specified prefix to the system header

- /// prefix list.

+ /// Add the specified prefix to the system header prefix list.

void AddSystemHeaderPrefix(StringRef Prefix, bool IsSystemHeader) {

SystemHeaderPrefixes.emplace_back(std::string(Prefix), IsSystemHeader);

}

- /// AddGnuCPlusPlusIncludePaths - Add the necessary paths to support a gnu

- /// libstdc++.

+ /// Add the necessary paths to support a gnu libstdc++.

/// Returns true if the \p Base path was found, false if it does not exist.

bool AddGnuCPlusPlusIncludePaths(StringRef Base, StringRef ArchDir,

StringRef Dir32, StringRef Dir64,

const llvm::Triple &triple);

- /// AddMinGWCPlusPlusIncludePaths - Add the necessary paths to support a MinGW

- /// libstdc++.

+ /// Add the necessary paths to support a MinGW libstdc++.

void AddMinGWCPlusPlusIncludePaths(StringRef Base,

StringRef Arch,

StringRef Version);

- // AddDefaultCIncludePaths - Add paths that should always be searched.

+ /// Add paths that should always be searched.

void AddDefaultCIncludePaths(const llvm::Triple &triple,

const HeaderSearchOptions &HSOpts);

- // AddDefaultCPlusPlusIncludePaths - Add paths that should be searched when

- // compiling c++.

+ /// Add paths that should be searched when compiling c++.

void AddDefaultCPlusPlusIncludePaths(const LangOptions &LangOpts,

const llvm::Triple &triple,

const HeaderSearchOptions &HSOpts);

- /// AddDefaultSystemIncludePaths - Adds the default system include paths so

- /// that e.g. stdio.h is found.

+ /// Returns true iff AddDefaultIncludePaths should do anything. If this

+ /// returns false, include paths should instead be handled in the driver.

+ bool ShouldAddDefaultIncludePaths(const llvm::Triple &triple);

+ /// Adds the default system include paths so that e.g. stdio.h is found.

void AddDefaultIncludePaths(const LangOptions &Lang,

const llvm::Triple &triple,

const HeaderSearchOptions &HSOpts);

- /// Realize - Merges all search path lists into one list and send it to

- /// HeaderSearch.

+ /// Merges all search path lists into one list and send it to HeaderSearch.

void Realize(const LangOptions &Lang);

};

@@ -123,7 +122,7 @@ static bool CanPrefixSysroot(StringRef Path) {

bool InitHeaderSearch::AddPath(const Twine &Path, IncludeDirGroup Group,

bool isFramework,

- Optional<unsigned> UserEntryIdx) {

+ std::optional<unsigned> UserEntryIdx) {

// Add the path with sysroot prepended, if desired and this is a system header

// group.

if (HasSysroot) {

@@ -140,7 +139,7 @@ bool InitHeaderSearch::AddPath(const Twine &Path, IncludeDirGroup Group,

bool InitHeaderSearch::AddUnmappedPath(const Twine &Path, IncludeDirGroup Group,

bool isFramework,

- Optional<unsigned> UserEntryIdx) {

+ std::optional<unsigned> UserEntryIdx) {

assert(!Path.isTriviallyEmpty() && "can't handle empty path here");

FileManager &FM = Headers.getFileMgr();

@@ -225,28 +224,23 @@ void InitHeaderSearch::AddMinGWCPlusPlusIncludePaths(StringRef Base,

void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,

const HeaderSearchOptions &HSOpts) {

- llvm::Triple::OSType os = triple.getOS();

- if (triple.isOSDarwin()) {

+ if (!ShouldAddDefaultIncludePaths(triple))

llvm_unreachable("Include management is handled in the driver.");

- }

+ llvm::Triple::OSType os = triple.getOS();

if (HSOpts.UseStandardSystemIncludes) {

switch (os) {

case llvm::Triple::CloudABI:

- case llvm::Triple::FreeBSD:

- case llvm::Triple::NetBSD:

- case llvm::Triple::OpenBSD:

case llvm::Triple::NaCl:

case llvm::Triple::PS4:

case llvm::Triple::PS5:

case llvm::Triple::ELFIAMCU:

- case llvm::Triple::Fuchsia:

break;

case llvm::Triple::Win32:

if (triple.getEnvironment() != llvm::Triple::Cygnus)

break;

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

default:

// FIXME: temporary hack: hard-coded paths.

AddPath("/usr/local/include", System, false);

@@ -280,12 +274,6 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,

}

switch (os) {

- case llvm::Triple::Linux:

- case llvm::Triple::Hurd:

- case llvm::Triple::Solaris:

- case llvm::Triple::OpenBSD:

- llvm_unreachable("Include management is handled in the driver.");

case llvm::Triple::CloudABI: {

// <sysroot>/<triple>/include

SmallString<128> P = StringRef(HSOpts.ResourceDir);

@@ -350,7 +338,6 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,

case llvm::Triple::RTEMS:

case llvm::Triple::NaCl:

case llvm::Triple::ELFIAMCU:

- case llvm::Triple::Fuchsia:

break;

case llvm::Triple::PS4:

case llvm::Triple::PS5: {

@@ -386,20 +373,12 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,

void InitHeaderSearch::AddDefaultCPlusPlusIncludePaths(

const LangOptions &LangOpts, const llvm::Triple &triple,

const HeaderSearchOptions &HSOpts) {

- llvm::Triple::OSType os = triple.getOS();

- // FIXME: temporary hack: hard-coded paths.

- if (triple.isOSDarwin()) {

+ if (!ShouldAddDefaultIncludePaths(triple))

llvm_unreachable("Include management is handled in the driver.");

- }

+ // FIXME: temporary hack: hard-coded paths.

+ llvm::Triple::OSType os = triple.getOS();

switch (os) {

- case llvm::Triple::Linux:

- case llvm::Triple::Hurd:

- case llvm::Triple::Solaris:

- case llvm::Triple::AIX:

- llvm_unreachable("Include management is handled in the driver.");

- break;

case llvm::Triple::Win32:

switch (triple.getEnvironment()) {

default: llvm_unreachable("Include management is handled in the driver.");

@@ -425,44 +404,61 @@ void InitHeaderSearch::AddDefaultCPlusPlusIncludePaths(

}

-void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang,

- const llvm::Triple &triple,

- const HeaderSearchOptions &HSOpts) {

- // NB: This code path is going away. All of the logic is moving into the

- // driver which has the information necessary to do target-specific

- // selections of default include paths. Each target which moves there will be

- // exempted from this logic here until we can delete the entire pile of code.

+bool InitHeaderSearch::ShouldAddDefaultIncludePaths(

+ const llvm::Triple &triple) {

switch (triple.getOS()) {

- default:

- break; // Everything else continues to use this routine's logic.

+ case llvm::Triple::AIX:

case llvm::Triple::Emscripten:

- case llvm::Triple::Linux:

- case llvm::Triple::Hurd:

+ case llvm::Triple::FreeBSD:

+ case llvm::Triple::NetBSD:

case llvm::Triple::OpenBSD:

+ case llvm::Triple::Fuchsia:

+ case llvm::Triple::Hurd:

+ case llvm::Triple::Linux:

case llvm::Triple::Solaris:

case llvm::Triple::WASI:

- case llvm::Triple::AIX:

- return;

+ return false;

case llvm::Triple::Win32:

if (triple.getEnvironment() != llvm::Triple::Cygnus ||

triple.isOSBinFormatMachO())

- return;

+ return false;

break;

case llvm::Triple::UnknownOS:

if (triple.isWasm())

- return;

+ return false;

+ break;

+ default:

break;

}

- // All header search logic is handled in the Driver for Darwin.

+ return true; // Everything else uses AddDefaultIncludePaths().

+void InitHeaderSearch::AddDefaultIncludePaths(

+ const LangOptions &Lang, const llvm::Triple &triple,

+ const HeaderSearchOptions &HSOpts) {

+ // NB: This code path is going away. All of the logic is moving into the

+ // driver which has the information necessary to do target-specific

+ // selections of default include paths. Each target which moves there will be

+ // exempted from this logic in ShouldAddDefaultIncludePaths() until we can

+ // delete the entire pile of code.

+ if (!ShouldAddDefaultIncludePaths(triple))

+ return;

+ // NOTE: some additional header search logic is handled in the driver for

+ // Darwin.

if (triple.isOSDarwin()) {

if (HSOpts.UseStandardSystemIncludes) {

// Add the default framework include paths on Darwin.

- AddPath("/System/Library/Frameworks", System, true);

- AddPath("/Library/Frameworks", System, true);

+ if (triple.isDriverKit()) {

+ AddPath("/System/DriverKit/System/Library/Frameworks", System, true);

+ } else {

+ AddPath("/System/Library/Frameworks", System, true);

+ AddPath("/Library/Frameworks", System, true);

+ }

}

return;

}

@@ -479,9 +475,9 @@ void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang,

AddDefaultCIncludePaths(triple, HSOpts);

}

-/// RemoveDuplicates - If there are duplicate directory entries in the specified

-/// search list, remove the later (dead) ones. Returns the number of non-system

-/// headers removed, which is used to update NumAngled.

+/// If there are duplicate directory entries in the specified search list,

+/// remove the later (dead) ones. Returns the number of non-system headers

+/// removed, which is used to update NumAngled.

static unsigned RemoveDuplicates(std::vector<DirectoryLookupInfo> &SearchList,

unsigned First, bool Verbose) {

llvm::SmallPtrSet<const DirectoryEntry *, 8> SeenDirs;

diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index a4cff403e739..d49d9e9e4b14 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp

@@ -26,8 +26,6 @@

#include "clang/Lex/Preprocessor.h"

#include "clang/Lex/PreprocessorOptions.h"

#include "clang/Lex/Token.h"

-#include "llvm/ADT/None.h"

-#include "llvm/ADT/Optional.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/StringExtras.h"

#include "llvm/ADT/StringRef.h"

@@ -44,6 +42,7 @@

#include <cstddef>

#include <cstdint>

#include <cstring>

+#include <optional>

#include <string>

#include <tuple>

#include <utility>

@@ -1048,9 +1047,11 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics(

while (SM.isMacroArgExpansion(Loc))

Loc = SM.getImmediateExpansionRange(Loc).getBegin();

- // If the macro's spelling has no FileID, then it's actually a token paste

- // or stringization (or similar) and not a macro at all.

- if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc))))

+ // If the macro's spelling isn't FileID or from scratch space, then it's

+ // actually a token paste or stringization (or similar) and not a macro at

+ // all.

+ SourceLocation SpellLoc = SM.getSpellingLoc(Loc);

+ if (!SpellLoc.isFileID() || SM.isWrittenInScratchSpace(SpellLoc))

return {};

// Find the spelling location of the start of the non-argument expansion

@@ -1195,15 +1196,16 @@ static char GetTrigraphCharForLetter(char Letter) {

/// whether trigraphs are enabled or not.

static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs) {

char Res = GetTrigraphCharForLetter(*CP);

- if (!Res || !L) return Res;

+ if (!Res)

+ return Res;

if (!Trigraphs) {

- if (!L->isLexingRawMode())

+ if (L && !L->isLexingRawMode())

L->Diag(CP-2, diag::trigraph_ignored);

return 0;

}

- if (!L->isLexingRawMode())

+ if (L && !L->isLexingRawMode())

L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);

return Res;

}

@@ -1256,12 +1258,12 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {

}

-Optional<Token> Lexer::findNextToken(SourceLocation Loc,

- const SourceManager &SM,

- const LangOptions &LangOpts) {

+std::optional<Token> Lexer::findNextToken(SourceLocation Loc,

+ const SourceManager &SM,

+ const LangOptions &LangOpts) {

if (Loc.isMacroID()) {

if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))

- return None;

+ return std::nullopt;

}

Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);

@@ -1272,7 +1274,7 @@ Optional<Token> Lexer::findNextToken(SourceLocation Loc,

bool InvalidTemp = false;

StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);

if (InvalidTemp)

- return None;

+ return std::nullopt;

const char *TokenBegin = File.data() + LocInfo.second;

@@ -1292,7 +1294,7 @@ Optional<Token> Lexer::findNextToken(SourceLocation Loc,

SourceLocation Lexer::findLocationAfterToken(

SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM,

const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) {

- Optional<Token> Tok = findNextToken(Loc, SM, LangOpts);

+ std::optional<Token> Tok = findNextToken(Loc, SM, LangOpts);

if (!Tok || Tok->isNot(TKind))

return {};

SourceLocation TokenLoc = Tok->getLocation();

@@ -1457,7 +1459,35 @@ static bool isUnicodeWhitespace(uint32_t Codepoint) {

return UnicodeWhitespaceChars.contains(Codepoint);

}

-static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {

+static llvm::SmallString<5> codepointAsHexString(uint32_t C) {

+ llvm::SmallString<5> CharBuf;

+ llvm::raw_svector_ostream CharOS(CharBuf);

+ llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);

+ return CharBuf;

+// To mitigate https://github.com/llvm/llvm-project/issues/54732,

+// we allow "Mathematical Notation Characters" in identifiers.

+// This is a proposed profile that extends the XID_Start/XID_continue

+// with mathematical symbols, superscipts and subscripts digits

+// found in some production software.

+// https://www.unicode.org/L2/L2022/22230-math-profile.pdf

+static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts,

+ bool IsStart, bool &IsExtension) {

+ static const llvm::sys::UnicodeCharSet MathStartChars(

+ MathematicalNotationProfileIDStartRanges);

+ static const llvm::sys::UnicodeCharSet MathContinueChars(

+ MathematicalNotationProfileIDContinueRanges);

+ if (MathStartChars.contains(C) ||

+ (!IsStart && MathContinueChars.contains(C))) {

+ IsExtension = true;

+ return true;

+ }

+ return false;

+static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts,

+ bool &IsExtension) {

if (LangOpts.AsmPreprocessor) {

return false;

} else if (LangOpts.DollarIdents && '$' == C) {

@@ -1469,8 +1499,10 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {

// '_' doesn't have the XID_Continue property but is allowed in C and C++.

static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);

static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges);

- return C == '_' || XIDStartChars.contains(C) ||

- XIDContinueChars.contains(C);

+ if (C == '_' || XIDStartChars.contains(C) || XIDContinueChars.contains(C))

+ return true;

+ return isMathematicalExtensionID(C, LangOpts, /*IsStart=*/false,

+ IsExtension);

} else if (LangOpts.C11) {

static const llvm::sys::UnicodeCharSet C11AllowedIDChars(

C11AllowedIDCharRanges);

@@ -1482,16 +1514,21 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {

}

-static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) {

+static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts,

+ bool &IsExtension) {

+ assert(C > 0x7F && "isAllowedInitiallyIDChar called with an ASCII codepoint");

+ IsExtension = false;

if (LangOpts.AsmPreprocessor) {

return false;

}

if (LangOpts.CPlusPlus || LangOpts.C2x) {

static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);

- // '_' doesn't have the XID_Start property but is allowed in C++.

- return C == '_' || XIDStartChars.contains(C);

+ if (XIDStartChars.contains(C))

+ return true;

+ return isMathematicalExtensionID(C, LangOpts, /*IsStart=*/true,

+ IsExtension);

}

- if (!isAllowedIDChar(C, LangOpts))

+ if (!isAllowedIDChar(C, LangOpts, IsExtension))

return false;

if (LangOpts.C11) {

static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(

@@ -1503,6 +1540,22 @@ static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) {

return !C99DisallowedInitialIDChars.contains(C);

}

+static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C,

+ CharSourceRange Range) {

+ static const llvm::sys::UnicodeCharSet MathStartChars(

+ MathematicalNotationProfileIDStartRanges);

+ static const llvm::sys::UnicodeCharSet MathContinueChars(

+ MathematicalNotationProfileIDContinueRanges);

+ (void)MathStartChars;

+ (void)MathContinueChars;

+ assert((MathStartChars.contains(C) || MathContinueChars.contains(C)) &&

+ "Unexpected mathematical notation codepoint");

+ Diags.Report(Range.getBegin(), diag::ext_mathematical_notation)

+ << codepointAsHexString(C) << Range;

static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin,

const char *End) {

return CharSourceRange::getCharRange(L.getSourceLocation(Begin),

@@ -1602,18 +1655,13 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,

std::lower_bound(std::begin(SortedHomoglyphs),

std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'});

if (Homoglyph->Character == C) {

- llvm::SmallString<5> CharBuf;

- {

- llvm::raw_svector_ostream CharOS(CharBuf);

- llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);

- }

if (Homoglyph->LooksLike) {

const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};

Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)

- << Range << CharBuf << LooksLikeStr;

+ << Range << codepointAsHexString(C) << LooksLikeStr;

} else {

Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)

- << Range << CharBuf;

+ << Range << codepointAsHexString(C);

}

@@ -1624,25 +1672,24 @@ static void diagnoseInvalidUnicodeCodepointInIdentifier(

if (isASCII(CodePoint))

return;

- bool IsIDStart = isAllowedInitiallyIDChar(CodePoint, LangOpts);

- bool IsIDContinue = IsIDStart || isAllowedIDChar(CodePoint, LangOpts);

+ bool IsExtension;

+ bool IsIDStart = isAllowedInitiallyIDChar(CodePoint, LangOpts, IsExtension);

+ bool IsIDContinue =

+ IsIDStart || isAllowedIDChar(CodePoint, LangOpts, IsExtension);

if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))

return;

bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;

- llvm::SmallString<5> CharBuf;

- llvm::raw_svector_ostream CharOS(CharBuf);

- llvm::write_hex(CharOS, CodePoint, llvm::HexPrintStyle::Upper, 4);

if (!IsFirst || InvalidOnlyAtStart) {

Diags.Report(Range.getBegin(), diag::err_character_not_allowed_identifier)

- << Range << CharBuf << int(InvalidOnlyAtStart)

+ << Range << codepointAsHexString(CodePoint) << int(InvalidOnlyAtStart)

<< FixItHint::CreateRemoval(Range);

} else {

Diags.Report(Range.getBegin(), diag::err_character_not_allowed)

- << Range << CharBuf << FixItHint::CreateRemoval(Range);

+ << Range << codepointAsHexString(CodePoint)

+ << FixItHint::CreateRemoval(Range);

}

@@ -1653,8 +1700,8 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,

if (CodePoint == 0) {

return false;

}

- if (!isAllowedIDChar(CodePoint, LangOpts)) {

+ bool IsExtension = false;

+ if (!isAllowedIDChar(CodePoint, LangOpts, IsExtension)) {

if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint))

return false;

if (!isLexingRawMode() && !ParsingPreprocessorDirective &&

@@ -1667,10 +1714,15 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,

// We got a unicode codepoint that is neither a space nor a

// a valid identifier part.

// Carry on as if the codepoint was valid for recovery purposes.

- } else if (!isLexingRawMode())

+ } else if (!isLexingRawMode()) {

+ if (IsExtension)

+ diagnoseExtensionInIdentifier(PP->getDiagnostics(), CodePoint,

+ makeCharRange(*this, CurPtr, UCNPtr));

maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,

makeCharRange(*this, CurPtr, UCNPtr),

/*IsFirst=*/false);

+ }

Result.setFlag(Token::HasUCN);

if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||

@@ -1693,7 +1745,9 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {

if (Result != llvm::conversionOK)

return false;

- if (!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) {

+ bool IsExtension = false;

+ if (!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts,

+ IsExtension)) {

if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint))

return false;

@@ -1706,6 +1760,9 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {

// a valid identifier part. Carry on as if the codepoint was

// valid for recovery purposes.

} else if (!isLexingRawMode()) {

+ if (IsExtension)

+ diagnoseExtensionInIdentifier(PP->getDiagnostics(), CodePoint,

+ makeCharRange(*this, CurPtr, UnicodePtr));

maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,

makeCharRange(*this, CurPtr, UnicodePtr),

/*IsFirst=*/false);

@@ -1719,9 +1776,13 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {

bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,

const char *CurPtr) {

- if (isAllowedInitiallyIDChar(C, LangOpts)) {

+ bool IsExtension = false;

+ if (isAllowedInitiallyIDChar(C, LangOpts, IsExtension)) {

if (!isLexingRawMode() && !ParsingPreprocessorDirective &&

!PP->isPreprocessedOutput()) {

+ if (IsExtension)

+ diagnoseExtensionInIdentifier(PP->getDiagnostics(), C,

+ makeCharRange(*this, BufferPtr, CurPtr));

maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C,

makeCharRange(*this, BufferPtr, CurPtr),

/*IsFirst=*/true);

@@ -1735,7 +1796,7 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,

if (!isLexingRawMode() && !ParsingPreprocessorDirective &&

!PP->isPreprocessedOutput() && !isASCII(*BufferPtr) &&

- !isAllowedInitiallyIDChar(C, LangOpts) && !isUnicodeWhitespace(C)) {

+ !isUnicodeWhitespace(C)) {

// Non-ASCII characters tend to creep into source code unintentionally.

// Instead of letting the parser complain about the unknown token,

// just drop the character.

@@ -2905,7 +2966,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) {

break;

}

// FALL THROUGH.

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case '\r':

case '\n':

// Okay, we found the end of the line. First, back up past the \0, \r, \n.

@@ -3195,9 +3256,9 @@ bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {

return false;

}

-llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

- const char *SlashLoc,

- Token *Result) {

+std::optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

+ const char *SlashLoc,

+ Token *Result) {

unsigned CharSize;

char Kind = getCharAndSize(StartPtr, CharSize);

assert((Kind == 'u' || Kind == 'U') && "expected a UCN");

@@ -3216,7 +3277,7 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

if (!LangOpts.CPlusPlus && !LangOpts.C99) {

if (Diagnose)

Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);

- return llvm::None;

+ return std::nullopt;

}

const char *CurPtr = StartPtr + CharSize;

@@ -3225,7 +3286,7 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

uint32_t CodePoint = 0;

while (Count != NumHexDigits || Delimited) {

char C = getCharAndSize(CurPtr, CharSize);

- if (!Delimited && C == '{') {

+ if (!Delimited && Count == 0 && C == '{') {

Delimited = true;

CurPtr += CharSize;

continue;

@@ -3242,15 +3303,15 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

if (!Delimited)

break;

if (Diagnose)

- Diag(BufferPtr, diag::warn_delimited_ucn_incomplete)

+ Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)

<< StringRef(KindLoc, 1);

- return llvm::None;

+ return std::nullopt;

}

if (CodePoint & 0xF000'0000) {

if (Diagnose)

Diag(KindLoc, diag::err_escape_too_large) << 0;

- return llvm::None;

+ return std::nullopt;

}

CodePoint <<= 4;

@@ -3261,21 +3322,21 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

if (Count == 0) {

if (Diagnose)

- Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty

+ Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty

: diag::warn_ucn_escape_no_digits)

<< StringRef(KindLoc, 1);

- return llvm::None;

+ return std::nullopt;

}

if (Delimited && Kind == 'U') {

if (Diagnose)

- Diag(StartPtr, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);

- return llvm::None;

+ Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);

+ return std::nullopt;

}

if (!Delimited && Count != NumHexDigits) {

if (Diagnose) {

- Diag(BufferPtr, diag::warn_ucn_escape_incomplete);

+ Diag(SlashLoc, diag::warn_ucn_escape_incomplete);

// If the user wrote \U1234, suggest a fixit to \u.

if (Count == 4 && NumHexDigits == 8) {

CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);

@@ -3283,19 +3344,22 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

<< FixItHint::CreateReplacement(URange, "u");

}

- return llvm::None;

+ return std::nullopt;

}

if (Delimited && PP) {

- Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b

- ? diag::warn_cxx2b_delimited_escape_sequence

- : diag::ext_delimited_escape_sequence)

+ Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b

+ ? diag::warn_cxx2b_delimited_escape_sequence

+ : diag::ext_delimited_escape_sequence)

<< /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0);

}

if (Result) {

Result->setFlag(Token::HasUCN);

- if (CurPtr - StartPtr == (ptrdiff_t)(Count + 2 + (Delimited ? 2 : 0)))

+ // If the UCN contains either a trigraph or a line splicing,

+ // we need to call getAndAdvanceChar again to set the appropriate flags

+ // on Result.

+ if (CurPtr - StartPtr == (ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))

StartPtr = CurPtr;

else

while (StartPtr != CurPtr)

@@ -3306,8 +3370,9 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr,

return CodePoint;

}

-llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,

- Token *Result) {

+std::optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,

+ const char *SlashLoc,

+ Token *Result) {

unsigned CharSize;

bool Diagnose = Result && !isLexingRawMode();

@@ -3320,8 +3385,8 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,

C = getCharAndSize(CurPtr, CharSize);

if (C != '{') {

if (Diagnose)

- Diag(StartPtr, diag::warn_ucn_escape_incomplete);

- return llvm::None;

+ Diag(SlashLoc, diag::warn_ucn_escape_incomplete);

+ return std::nullopt;

}

CurPtr += CharSize;

const char *StartName = CurPtr;

@@ -3335,28 +3400,29 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,

break;

}

- if (!isAlphanumeric(C) && C != '_' && C != '-' && C != ' ')

+ if (isVerticalWhitespace(C))

break;

Buffer.push_back(C);

}

if (!FoundEndDelimiter || Buffer.empty()) {

if (Diagnose)

- Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty

+ Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty

: diag::warn_delimited_ucn_incomplete)

<< StringRef(KindLoc, 1);

- return llvm::None;

+ return std::nullopt;

}

StringRef Name(Buffer.data(), Buffer.size());

- llvm::Optional<char32_t> Res =

+ std::optional<char32_t> Match =

llvm::sys::unicode::nameToCodepointStrict(Name);

- llvm::Optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;

- if (!Res) {

- if (!isLexingRawMode()) {

- Diag(StartPtr, diag::err_invalid_ucn_name)

- << StringRef(Buffer.data(), Buffer.size());

- LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);

+ std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;

+ if (!Match) {

+ LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);

+ if (Diagnose) {

+ Diag(StartName, diag::err_invalid_ucn_name)

+ << StringRef(Buffer.data(), Buffer.size())

+ << makeCharRange(*this, StartName, CurPtr - CharSize);

if (LooseMatch) {

Diag(StartName, diag::note_invalid_ucn_name_loose_matching)

<< FixItHint::CreateReplacement(

@@ -3364,27 +3430,30 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,

LooseMatch->Name);

}

- // When finding a match using Unicode loose matching rules

- // recover after having emitted a diagnostic.

- if (!LooseMatch)

- return llvm::None;

- // We do not offer missspelled character names suggestions here

+ // We do not offer misspelled character names suggestions here

// as the set of what would be a valid suggestion depends on context,

// and we should not make invalid suggestions.

}

- if (Diagnose && PP && !LooseMatch)

- Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b

- ? diag::warn_cxx2b_delimited_escape_sequence

- : diag::ext_delimited_escape_sequence)

+ if (Diagnose && Match)

+ Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b

+ ? diag::warn_cxx2b_delimited_escape_sequence

+ : diag::ext_delimited_escape_sequence)

<< /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0);

- if (LooseMatch)

- Res = LooseMatch->CodePoint;

+ // If no diagnostic has been emitted yet, likely because we are doing a

+ // tentative lexing, we do not want to recover here to make sure the token

+ // will not be incorrectly considered valid. This function will be called

+ // again and a diagnostic emitted then.

+ if (LooseMatch && Diagnose)

+ Match = LooseMatch->CodePoint;

if (Result) {

Result->setFlag(Token::HasUCN);

- if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 4))

+ // If the UCN contains either a trigraph or a line splicing,

+ // we need to call getAndAdvanceChar again to set the appropriate flags

+ // on Result.

+ if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 3))

StartPtr = CurPtr;

else

while (StartPtr != CurPtr)

@@ -3392,19 +3461,19 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,

} else {

StartPtr = CurPtr;

}

- return *Res;

+ return Match ? std::optional<uint32_t>(*Match) : std::nullopt;

}

uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,

Token *Result) {

unsigned CharSize;

- llvm::Optional<uint32_t> CodePointOpt;

+ std::optional<uint32_t> CodePointOpt;

char Kind = getCharAndSize(StartPtr, CharSize);

if (Kind == 'u' || Kind == 'U')

CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc, Result);

else if (Kind == 'N')

- CodePointOpt = tryReadNamedUCN(StartPtr, Result);

+ CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc, Result);

if (!CodePointOpt)

return 0;

@@ -3516,10 +3585,9 @@ bool Lexer::Lex(Token &Result) {

/// token, not a normal token, as such, it is an internal interface. It assumes

/// that the Flags of result have been cleared before calling this.

bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {

-LexNextToken:

- // New token, can't need cleaning yet.

- Result.clearFlag(Token::NeedsCleaning);

- Result.setIdentifierInfo(nullptr);

+LexStart:

+ assert(!Result.needsCleaning() && "Result needs cleaning");

+ assert(!Result.hasPtrData() && "Result has not been reset");

// CurPtr - Cache BufferPtr in an automatic variable.

const char *CurPtr = BufferPtr;

@@ -3591,7 +3659,7 @@ LexNextToken:

case '\r':

if (CurPtr[0] == '\n')

(void)getAndAdvanceChar(CurPtr, Result);

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case '\n':

// If we are inside a preprocessor directive and we see the end of line,

// we know we are done with the directive, so return an EOD token.

@@ -3788,7 +3856,7 @@ LexNextToken:

return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),

tok::wide_char_constant);

// FALL THROUGH, treating L like the start of an identifier.

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

// C99 6.4.2: Identifiers.

case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':

@@ -4301,6 +4369,10 @@ HandleDirective:

// We parsed the directive; lex a token with the new state.

return false;

+LexNextToken:

+ Result.clearFlag(Token::NeedsCleaning);

+ goto LexStart;

}

const char *Lexer::convertDependencyDirectiveToken(

@@ -4323,6 +4395,8 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {

while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {

if (DepDirectives.front().Kind == pp_eof)

return LexEndOfFile(Result, BufferEnd);

+ if (DepDirectives.front().Kind == tokens_present_before_eof)

+ MIOpt.ReadToken();

NextDepDirectiveTokenIndex = 0;

DepDirectives = DepDirectives.drop_front();

}

@@ -4334,6 +4408,22 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {

MIOpt.ReadToken();

}

+ if (ParsingFilename && DDTok.is(tok::less)) {

+ BufferPtr = BufferStart + DDTok.Offset;

+ LexAngledStringLiteral(Result, BufferPtr + 1);

+ if (Result.isNot(tok::header_name))

+ return true;

+ // Advance the index of lexed tokens.

+ while (true) {

+ const dependency_directives_scan::Token &NextTok =

+ DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];

+ if (BufferStart + NextTok.Offset >= BufferPtr)

+ break;

+ ++NextDepDirectiveTokenIndex;

+ }

+ return true;

+ }

const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result);

if (Result.is(tok::hash) && Result.isAtStartOfLine()) {

@@ -4398,6 +4488,7 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {

case cxx_import_decl:

case cxx_export_module_decl:

case cxx_export_import_decl:

+ case tokens_present_before_eof:

break;

case pp_if:

case pp_ifdef:

diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 53635a7385ec..421a85336043 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp

@@ -358,7 +358,7 @@ void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {

++I;

auto Delim = std::find(I, Input.end(), '}');

assert(Delim != Input.end());

- llvm::Optional<llvm::sys::unicode::LooseMatchingResult> Res =

+ std::optional<llvm::sys::unicode::LooseMatchingResult> Res =

llvm::sys::unicode::nameToCodepointLooseMatching(

StringRef(I, std::distance(I, Delim)));

assert(Res);

@@ -487,7 +487,7 @@ static void DiagnoseInvalidUnicodeCharacterName(

namespace u = llvm::sys::unicode;

- llvm::Optional<u::LooseMatchingResult> Res =

+ std::optional<u::LooseMatchingResult> Res =

u::nameToCodepointLooseMatching(Name);

if (Res) {

Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,

@@ -515,8 +515,9 @@ static void DiagnoseInvalidUnicodeCharacterName(

std::string Str;

llvm::UTF32 V = Match.Value;

- LLVM_ATTRIBUTE_UNUSED bool Converted =

+ bool Converted =

llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&V, 1), Str);

+ (void)Converted;

assert(Converted && "Found a match wich is not a unicode character");

Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,

@@ -545,15 +546,13 @@ static bool ProcessNamedUCNEscape(const char *ThisTokBegin,

diag::err_delimited_escape_missing_brace)

<< StringRef(&ThisTokBuf[-1], 1);

}

- ThisTokBuf++;

return false;

}

ThisTokBuf++;

- const char *ClosingBrace =

- std::find_if_not(ThisTokBuf, ThisTokEnd, [](char C) {

- return llvm::isAlnum(C) || llvm::isSpace(C) || C == '_' || C == '-';

- });

- bool Incomplete = ClosingBrace == ThisTokEnd || *ClosingBrace != '}';

+ const char *ClosingBrace = std::find_if(ThisTokBuf, ThisTokEnd, [](char C) {

+ return C == '}' || isVerticalWhitespace(C);

+ });

+ bool Incomplete = ClosingBrace == ThisTokEnd;

bool Empty = ClosingBrace == ThisTokBuf;

if (Incomplete || Empty) {

if (Diags) {

@@ -567,8 +566,7 @@ static bool ProcessNamedUCNEscape(const char *ThisTokBegin,

}

StringRef Name(ThisTokBuf, ClosingBrace - ThisTokBuf);

ThisTokBuf = ClosingBrace + 1;

- llvm::Optional<char32_t> Res =

- llvm::sys::unicode::nameToCodepointStrict(Name);

+ std::optional<char32_t> Res = llvm::sys::unicode::nameToCodepointStrict(Name);

if (!Res) {

if (Diags)

DiagnoseInvalidUnicodeCharacterName(Diags, Features, Loc, ThisTokBegin,

@@ -766,13 +764,13 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,

switch (bytesToWrite) { // note: everything falls through.

case 4:

*--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case 3:

*--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case 2:

*--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case 1:

*--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);

}

@@ -945,9 +943,13 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,

// CUDA host and device may have different _Float16 support, therefore

// allows f16 literals to avoid false alarm.

+ // When we compile for OpenMP target offloading on NVPTX, f16 suffix

+ // should also be supported.

// ToDo: more precise check for CUDA.

- if ((Target.hasFloat16Type() || LangOpts.CUDA) && s + 2 < ThisTokEnd &&

- s[1] == '1' && s[2] == '6') {

+ // TODO: AMDGPU might also support it in the future.

+ if ((Target.hasFloat16Type() || LangOpts.CUDA ||

+ (LangOpts.OpenMPIsDevice && Target.getTriple().isNVPTX())) &&

+ s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {

s += 2; // success, eat up 2 characters.

isFloat16 = true;

continue;

@@ -1037,7 +1039,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,

break;

}

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case 'j':

case 'J':

if (isImaginary) break; // Cannot be repeated.

diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp
index 7ede00b4aa64..c54f69bb9ead 100644
--- a/clang/lib/Lex/MacroArgs.cpp
+++ b/clang/lib/Lex/MacroArgs.cpp

@@ -62,7 +62,7 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,

// Copy the actual unexpanded tokens to immediately after the result ptr.

if (!UnexpArgTokens.empty()) {

- static_assert(std::is_trivial<Token>::value,

+ static_assert(std::is_trivial_v<Token>,

"assume trivial copyability if copying into the "

"uninitialized array (as opposed to reusing a cached "

"MacroArgs)");

@@ -94,7 +94,7 @@ MacroArgs *MacroArgs::deallocate() {

// Run the dtor to deallocate the vectors.

this->~MacroArgs();

// Release the memory for the object.

- static_assert(std::is_trivially_destructible<Token>::value,

+ static_assert(std::is_trivially_destructible_v<Token>,

"assume trivially destructible and forego destructors");

free(this);

@@ -169,7 +169,7 @@ const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,

std::vector<Token> &Result = PreExpArgTokens[Arg];

if (!Result.empty()) return Result;

- SaveAndRestore<bool> PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);

+ SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);

const Token *AT = getUnexpArgument(Arg);

unsigned NumToks = getArgLength(AT)+1; // Include the EOF.

diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp
index eae12beb6244..39bb0f44eff2 100644
--- a/clang/lib/Lex/MacroInfo.cpp
+++ b/clang/lib/Lex/MacroInfo.cpp

@@ -18,12 +18,12 @@

#include "clang/Basic/TokenKinds.h"

#include "clang/Lex/Preprocessor.h"

#include "clang/Lex/Token.h"

-#include "llvm/ADT/Optional.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/raw_ostream.h"

#include <cassert>

+#include <optional>

#include <utility>

using namespace clang;

@@ -34,11 +34,11 @@ namespace {

// and 4 byte SourceLocation.

template <int> class MacroInfoSizeChecker {

public:

- constexpr static bool AsExpected = true;

+ [[maybe_unused]] constexpr static bool AsExpected = true;

};

template <> class MacroInfoSizeChecker<8> {

public:

- constexpr static bool AsExpected =

+ [[maybe_unused]] constexpr static bool AsExpected =

sizeof(MacroInfo) == (32 + sizeof(SourceLocation) * 2);

};

@@ -118,7 +118,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,

if (A.getKind() != B.getKind())

return false;

- // If this isn't the first first token, check that the whitespace and

+ // If this isn't the first token, check that the whitespace and

// start-of-line characteristics match.

if (i != 0 &&

(A.isAtStartOfLine() != B.isAtStartOfLine() ||

@@ -198,7 +198,7 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const {

MacroDirective::DefInfo MacroDirective::getDefinition() {

MacroDirective *MD = this;

SourceLocation UndefLoc;

- Optional<bool> isPublic;

+ std::optional<bool> isPublic;

for (; MD; MD = MD->getPrevious()) {

if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))

return DefInfo(DefMD, UndefLoc, !isPublic || *isPublic);

@@ -213,7 +213,7 @@ MacroDirective::DefInfo MacroDirective::getDefinition() {

isPublic = VisMD->isPublic();

}

- return DefInfo(nullptr, UndefLoc, !isPublic || isPublic.value());

+ return DefInfo(nullptr, UndefLoc, !isPublic || *isPublic);

}

const MacroDirective::DefInfo

diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 47d6f5893e97..ee2cca4e0814 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp

@@ -28,7 +28,6 @@

#include "clang/Lex/LiteralSupport.h"

#include "clang/Lex/Token.h"

#include "llvm/ADT/DenseMap.h"

-#include "llvm/ADT/None.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallString.h"

@@ -47,6 +46,7 @@

#include <cassert>

#include <cstdint>

#include <cstring>

+#include <optional>

#include <string>

#include <system_error>

#include <utility>

@@ -75,7 +75,6 @@ void ModuleMap::addLinkAsDependency(Module *Mod) {

Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) {

switch ((int)Role) {

- default: llvm_unreachable("unknown header role");

case NormalHeader:

return Module::HK_Normal;

case PrivateHeader:

@@ -84,7 +83,10 @@ Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) {

return Module::HK_Textual;

case PrivateHeader | TextualHeader:

return Module::HK_PrivateTextual;

+ case ExcludedHeader:

+ return Module::HK_Excluded;

}

+ llvm_unreachable("unknown header role");

}

ModuleMap::ModuleHeaderRole

@@ -99,11 +101,15 @@ ModuleMap::headerKindToRole(Module::HeaderKind Kind) {

case Module::HK_PrivateTextual:

return ModuleHeaderRole(PrivateHeader | TextualHeader);

case Module::HK_Excluded:

- llvm_unreachable("unexpected header kind");

+ return ExcludedHeader;

}

llvm_unreachable("unknown header kind");

}

+bool ModuleMap::isModular(ModuleHeaderRole Role) {

+ return !(Role & (ModuleMap::TextualHeader | ModuleMap::ExcludedHeader));

Module::ExportDecl

ModuleMap::resolveExport(Module *Mod,

const Module::UnresolvedExportDecl &Unresolved,

@@ -171,23 +177,23 @@ static void appendSubframeworkPaths(Module *Mod,

llvm::sys::path::append(Path, "Frameworks", Framework + ".framework");

}

-Optional<FileEntryRef> ModuleMap::findHeader(

+OptionalFileEntryRef ModuleMap::findHeader(

Module *M, const Module::UnresolvedHeaderDirective &Header,

SmallVectorImpl<char> &RelativePathName, bool &NeedsFramework) {

// Search for the header file within the module's home directory.

auto *Directory = M->Directory;

SmallString<128> FullPathName(Directory->getName());

- auto GetFile = [&](StringRef Filename) -> Optional<FileEntryRef> {

+ auto GetFile = [&](StringRef Filename) -> OptionalFileEntryRef {

auto File =

expectedToOptional(SourceMgr.getFileManager().getFileRef(Filename));

if (!File || (Header.Size && File->getSize() != *Header.Size) ||

(Header.ModTime && File->getModificationTime() != *Header.ModTime))

- return None;

+ return std::nullopt;

return *File;

};

- auto GetFrameworkFile = [&]() -> Optional<FileEntryRef> {

+ auto GetFrameworkFile = [&]() -> OptionalFileEntryRef {

unsigned FullPathLength = FullPathName.size();

appendSubframeworkPaths(M, RelativePathName);

unsigned RelativePathLength = RelativePathName.size();

@@ -241,7 +247,7 @@ Optional<FileEntryRef> ModuleMap::findHeader(

<< Header.FileName << M->getFullModuleName();

NeedsFramework = true;

}

- return None;

+ return std::nullopt;

}

return NormalHdrFile;

@@ -251,7 +257,7 @@ void ModuleMap::resolveHeader(Module *Mod,

const Module::UnresolvedHeaderDirective &Header,

bool &NeedsFramework) {

SmallString<128> RelativePathName;

- if (Optional<FileEntryRef> File =

+ if (OptionalFileEntryRef File =

findHeader(Mod, Header, RelativePathName, NeedsFramework)) {

if (Header.IsUmbrella) {

const DirectoryEntry *UmbrellaDir = &File->getDir().getDirEntry();

@@ -264,10 +270,7 @@ void ModuleMap::resolveHeader(Module *Mod,

} else {

Module::Header H = {Header.FileName, std::string(RelativePathName.str()),

*File};

- if (Header.Kind == Module::HK_Excluded)

- excludeHeader(Mod, H);

- else

- addHeader(Mod, H, headerKindToRole(Header.Kind));

+ addHeader(Mod, H, headerKindToRole(Header.Kind));

}

} else if (Header.HasBuiltinHeader && !Header.Size && !Header.ModTime) {

// There's a builtin header but no corresponding on-disk header. Assume

@@ -301,7 +304,7 @@ bool ModuleMap::resolveAsBuiltinHeader(

// supplied by Clang. Find that builtin header.

SmallString<128> Path;

llvm::sys::path::append(Path, BuiltinIncludeDir->getName(), Header.FileName);

- auto File = SourceMgr.getFileManager().getFile(Path);

+ auto File = SourceMgr.getFileManager().getOptionalFileRef(Path);

if (!File)

return false;

@@ -479,7 +482,7 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,

if (RequestingModule) {

resolveUses(RequestingModule, /*Complain=*/false);

- resolveHeaderDirectives(RequestingModule, /*File=*/llvm::None);

+ resolveHeaderDirectives(RequestingModule, /*File=*/std::nullopt);

}

bool Excluded = false;

@@ -489,6 +492,12 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,

HeadersMap::iterator Known = findKnownHeader(File);

if (Known != Headers.end()) {

for (const KnownHeader &Header : Known->second) {

+ // Excluded headers don't really belong to a module.

+ if (Header.getRole() == ModuleMap::ExcludedHeader) {

+ Excluded = true;

+ continue;

+ }

// Remember private headers for later printing of a diagnostic.

if (violatesPrivateInclude(RequestingModule, File, Header)) {

Private = Header.getModule();

@@ -562,12 +571,18 @@ static bool isBetterKnownHeader(const ModuleMap::KnownHeader &New,

(Old.getRole() & ModuleMap::TextualHeader))

return !(New.getRole() & ModuleMap::TextualHeader);

+ // Prefer a non-excluded header over an excluded header.

+ if ((New.getRole() == ModuleMap::ExcludedHeader) !=

+ (Old.getRole() == ModuleMap::ExcludedHeader))

+ return New.getRole() != ModuleMap::ExcludedHeader;

// Don't have a reason to choose between these. Just keep the first one.

return false;

}

ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File,

- bool AllowTextual) {

+ bool AllowTextual,

+ bool AllowExcluded) {

auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader {

if (!AllowTextual && R.getRole() & ModuleMap::TextualHeader)

return {};

@@ -579,6 +594,9 @@ ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File,

ModuleMap::KnownHeader Result;

// Iterate over all modules that 'File' is part of to find the best fit.

for (KnownHeader &H : Known->second) {

+ // Cannot use a module if the header is excluded in it.

+ if (!AllowExcluded && H.getRole() == ModuleMap::ExcludedHeader)

+ continue;

// Prefer a header from the source module over all others.

if (H.getModule()->getTopLevelModule() == SourceModule)

return MakeResult(H);

@@ -607,7 +625,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) {

UmbrellaModule = UmbrellaModule->Parent;

if (UmbrellaModule->InferSubmodules) {

- const FileEntry *UmbrellaModuleMap =

+ OptionalFileEntryRefDegradesToFileEntryPtr UmbrellaModuleMap =

getModuleMapFileForUniquing(UmbrellaModule);

// Infer submodules for each of the directories we found between

@@ -672,7 +690,7 @@ ModuleMap::findAllModulesForHeader(const FileEntry *File) {

if (findOrCreateModuleForHeaderInUmbrellaDir(File))

return Headers.find(File)->second;

- return None;

+ return std::nullopt;

}

ArrayRef<ModuleMap::KnownHeader>

@@ -681,7 +699,7 @@ ModuleMap::findResolvedModulesForHeader(const FileEntry *File) const {

resolveHeaderDirectives(File);

auto It = Headers.find(File);

if (It == Headers.end())

- return None;

+ return std::nullopt;

return It->second;

}

@@ -700,6 +718,9 @@ ModuleMap::isHeaderUnavailableInModule(const FileEntry *Header,

E = Known->second.end();

I != E; ++I) {

+ if (I->getRole() == ModuleMap::ExcludedHeader)

+ continue;

if (I->isAvailable() &&

(!RequestingModule ||

I->getModule()->isSubModuleOf(RequestingModule))) {

@@ -852,8 +873,7 @@ ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent,

}

Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc,

- StringRef Name,

- Module *GlobalModule) {

+ StringRef Name) {

assert(LangOpts.CurrentModule == Name && "module name mismatch");

assert(!Modules[Name] && "redefining existing module");

@@ -879,29 +899,6 @@ Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc,

return Result;

}

-Module *ModuleMap::createHeaderModule(StringRef Name,

- ArrayRef<Module::Header> Headers) {

- assert(LangOpts.CurrentModule == Name && "module name mismatch");

- assert(!Modules[Name] && "redefining existing module");

- auto *Result =

- new Module(Name, SourceLocation(), nullptr, /*IsFramework*/ false,

- /*IsExplicit*/ false, NumCreatedModules++);

- Result->Kind = Module::ModuleInterfaceUnit;

- Modules[Name] = SourceModule = Result;

- for (const Module::Header &H : Headers) {

- auto *M = new Module(H.NameAsWritten, SourceLocation(), Result,

- /*IsFramework*/ false,

- /*IsExplicit*/ true, NumCreatedModules++);

- // Header modules are implicitly 'export *'.

- M->Exports.push_back(Module::ExportDecl(nullptr, true));

- addHeader(M, H, NormalHeader);

- }

- return Result;

Module *ModuleMap::createHeaderUnit(SourceLocation Loc, StringRef Name,

Module::Header H) {

assert(LangOpts.CurrentModule == Name && "module name mismatch");

@@ -1018,14 +1015,16 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,

// If we're not allowed to infer a framework module, don't.

if (!canInfer)

return nullptr;

- } else

- ModuleMapFile = getModuleMapFileForUniquing(Parent);

+ } else {

+ OptionalFileEntryRefDegradesToFileEntryPtr ModuleMapRef =

+ getModuleMapFileForUniquing(Parent);

+ ModuleMapFile = ModuleMapRef;

+ }

// Look for an umbrella header.

SmallString<128> UmbrellaName = StringRef(FrameworkDir->getName());

llvm::sys::path::append(UmbrellaName, "Headers", ModuleName + ".h");

- auto UmbrellaHeader = FileMgr.getFile(UmbrellaName);

+ auto UmbrellaHeader = FileMgr.getOptionalFileRef(UmbrellaName);

// FIXME: If there's no umbrella header, we could probably scan the

// framework to load *everything*. But, it's not clear that this is a good

@@ -1137,14 +1136,14 @@ Module *ModuleMap::createShadowedModule(StringRef Name, bool IsFramework,

}

void ModuleMap::setUmbrellaHeader(

- Module *Mod, const FileEntry *UmbrellaHeader, const Twine &NameAsWritten,

+ Module *Mod, FileEntryRef UmbrellaHeader, const Twine &NameAsWritten,

const Twine &PathRelativeToRootModuleDirectory) {

Headers[UmbrellaHeader].push_back(KnownHeader(Mod, NormalHeader));

- Mod->Umbrella = UmbrellaHeader;

+ Mod->Umbrella = &UmbrellaHeader.getMapEntry();

Mod->UmbrellaAsWritten = NameAsWritten.str();

Mod->UmbrellaRelativeToRootModuleDirectory =

PathRelativeToRootModuleDirectory.str();

- UmbrellaDirs[UmbrellaHeader->getDir()] = Mod;

+ UmbrellaDirs[UmbrellaHeader.getDir()] = Mod;

// Notify callbacks that we just added a new header.

for (const auto &Cb : Callbacks)

@@ -1214,11 +1213,11 @@ void ModuleMap::resolveHeaderDirectives(const FileEntry *File) const {

}

void ModuleMap::resolveHeaderDirectives(

- Module *Mod, llvm::Optional<const FileEntry *> File) const {

+ Module *Mod, std::optional<const FileEntry *> File) const {

bool NeedsFramework = false;

SmallVector<Module::UnresolvedHeaderDirective, 1> NewHeaders;

- const auto Size = File ? File.value()->getSize() : 0;

- const auto ModTime = File ? File.value()->getModificationTime() : 0;

+ const auto Size = File ? (*File)->getSize() : 0;

+ const auto ModTime = File ? (*File)->getModificationTime() : 0;

for (auto &Header : Mod->UnresolvedHeaders) {

if (File && ((Header.ModTime && Header.ModTime != ModTime) ||

@@ -1260,29 +1259,21 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,

Cb->moduleMapAddHeader(Header.Entry->getName());

}

-void ModuleMap::excludeHeader(Module *Mod, Module::Header Header) {

- // Add this as a known header so we won't implicitly add it to any

- // umbrella directory module.

- // FIXME: Should we only exclude it from umbrella modules within the

- // specified module?

- (void) Headers[Header.Entry];

- Mod->Headers[Module::HK_Excluded].push_back(std::move(Header));

-const FileEntry *

+OptionalFileEntryRef

ModuleMap::getContainingModuleMapFile(const Module *Module) const {

if (Module->DefinitionLoc.isInvalid())

- return nullptr;

+ return std::nullopt;

- return SourceMgr.getFileEntryForID(

- SourceMgr.getFileID(Module->DefinitionLoc));

+ return SourceMgr.getFileEntryRefForID(

+ SourceMgr.getFileID(Module->DefinitionLoc));

}

-const FileEntry *ModuleMap::getModuleMapFileForUniquing(const Module *M) const {

+OptionalFileEntryRef

+ModuleMap::getModuleMapFileForUniquing(const Module *M) const {

if (M->IsInferred) {

assert(InferredModuleAllowedBy.count(M) && "missing inferred module map");

- return InferredModuleAllowedBy.find(M)->second;

+ // FIXME: Update InferredModuleAllowedBy to use FileEntryRef.

+ return InferredModuleAllowedBy.find(M)->second->getLastRef();

}

return getContainingModuleMapFile(M);

}

@@ -1292,6 +1283,49 @@ void ModuleMap::setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap) {

InferredModuleAllowedBy[M] = ModMap;

}

+std::error_code

+ModuleMap::canonicalizeModuleMapPath(SmallVectorImpl<char> &Path) {

+ StringRef Dir = llvm::sys::path::parent_path({Path.data(), Path.size()});

+ // Do not canonicalize within the framework; the module map parser expects

+ // Modules/ not Versions/A/Modules.

+ if (llvm::sys::path::filename(Dir) == "Modules") {

+ StringRef Parent = llvm::sys::path::parent_path(Dir);

+ if (Parent.endswith(".framework"))

+ Dir = Parent;

+ }

+ FileManager &FM = SourceMgr.getFileManager();

+ auto DirEntry = FM.getDirectory(Dir.empty() ? "." : Dir);

+ if (!DirEntry)

+ return DirEntry.getError();

+ // Canonicalize the directory.

+ StringRef CanonicalDir = FM.getCanonicalName(*DirEntry);

+ if (CanonicalDir != Dir) {

+ auto CanonicalDirEntry = FM.getDirectory(CanonicalDir);

+ // Only use the canonicalized path if it resolves to the same entry as the

+ // original. This is not true if there's a VFS overlay on top of a FS where

+ // the directory is a symlink. The overlay would not remap the target path

+ // of the symlink to the same directory entry in that case.

+ if (CanonicalDirEntry && *CanonicalDirEntry == *DirEntry) {

+ bool Done = llvm::sys::path::replace_path_prefix(Path, Dir, CanonicalDir);

+ (void)Done;

+ assert(Done && "Path should always start with Dir");

+ }

+ // In theory, the filename component should also be canonicalized if it

+ // on a case-insensitive filesystem. However, the extra canonicalization is

+ // expensive and if clang looked up the filename it will always be lowercase.

+ // Remove ., remove redundant separators, and switch to native separators.

+ // This is needed for separators between CanonicalDir and the filename.

+ llvm::sys::path::remove_dots(Path);

+ return std::error_code();

void ModuleMap::addAdditionalModuleMapFile(const Module *M,

const FileEntry *ModuleMap) {

AdditionalModMaps[M].insert(ModuleMap);

@@ -1668,7 +1702,7 @@ retry:

break;

}

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

default:

Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token);

@@ -2026,8 +2060,7 @@ void ModuleMapParser::parseModuleDecl() {

ActiveModule->IsSystem = true;

if (Attrs.IsExternC)

ActiveModule->IsExternC = true;

- if (Attrs.NoUndeclaredIncludes ||

- (!ActiveModule->Parent && ModuleName == "Darwin"))

+ if (Attrs.NoUndeclaredIncludes)

ActiveModule->NoUndeclaredIncludes = true;

ActiveModule->Directory = Directory;

@@ -2300,6 +2333,7 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,

SourceLocation LeadingLoc) {

// We've already consumed the first token.

ModuleMap::ModuleHeaderRole Role = ModuleMap::NormalHeader;

if (LeadingToken == MMToken::PrivateKeyword) {

Role = ModuleMap::PrivateHeader;

// 'private' may optionally be followed by 'textual'.

@@ -2307,6 +2341,8 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,

LeadingToken = Tok.Kind;

consumeToken();

}

+ } else if (LeadingToken == MMToken::ExcludeKeyword) {

+ Role = ModuleMap::ExcludedHeader;

}

if (LeadingToken == MMToken::TextualKeyword)

@@ -2340,9 +2376,7 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,

Header.FileName = std::string(Tok.getString());

Header.FileNameLoc = consumeToken();

Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword;

- Header.Kind =

- (LeadingToken == MMToken::ExcludeKeyword ? Module::HK_Excluded

- : Map.headerRoleToKind(Role));

+ Header.Kind = Map.headerRoleToKind(Role);

// Check whether we already have an umbrella.

if (Header.IsUmbrella && ActiveModule->Umbrella) {

@@ -2476,8 +2510,8 @@ void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) {

SourceMgr.getFileManager().getVirtualFileSystem();

for (llvm::vfs::recursive_directory_iterator I(FS, Dir->getName(), EC), E;

I != E && !EC; I.increment(EC)) {

- if (auto FE = SourceMgr.getFileManager().getFile(I->path())) {

- Module::Header Header = {"", std::string(I->path()), *FE};

+ if (auto FE = SourceMgr.getFileManager().getOptionalFileRef(I->path())) {

+ Module::Header Header = {"", std::string(I->path()), FE};

Headers.push_back(std::move(Header));

}

@@ -3033,7 +3067,7 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem,

}

assert(Target && "Missing target information");

- llvm::Optional<llvm::MemoryBufferRef> Buffer = SourceMgr.getBufferOrNone(ID);

+ std::optional<llvm::MemoryBufferRef> Buffer = SourceMgr.getBufferOrNone(ID);

if (!Buffer)

return ParsedModuleMap[File] = true;

assert((!Offset || *Offset <= Buffer->getBufferSize()) &&

diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp
index b618071590ba..f2b60a728e90 100644
--- a/clang/lib/Lex/PPCallbacks.cpp
+++ b/clang/lib/Lex/PPCallbacks.cpp

@@ -15,16 +15,15 @@ using namespace clang;

PPCallbacks::~PPCallbacks() = default;

void PPCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,

- bool IsAngled, Optional<FileEntryRef> File,

+ bool IsAngled, OptionalFileEntryRef File,

SrcMgr::CharacteristicKind FileType) {}

// Out of line key method.

PPChainedCallbacks::~PPChainedCallbacks() = default;

void PPChainedCallbacks::HasInclude(SourceLocation Loc, StringRef FileName,

- bool IsAngled, Optional<FileEntryRef> File,

+ bool IsAngled, OptionalFileEntryRef File,

SrcMgr::CharacteristicKind FileType) {

First->HasInclude(Loc, FileName, IsAngled, File, FileType);

Second->HasInclude(Loc, FileName, IsAngled, File, FileType);

}

diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 9a8fd4391b41..6ae513dea878 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp

@@ -47,6 +47,7 @@

#include <cassert>

#include <cstring>

#include <new>

+#include <optional>

#include <string>

#include <utility>

@@ -57,9 +58,8 @@ using namespace clang;

//===----------------------------------------------------------------------===//

MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {

- auto *MIChain = new (BP) MacroInfoChain{L, MIChainHead};

- MIChainHead = MIChain;

- return &MIChain->MI;

+ static_assert(std::is_trivially_destructible_v<MacroInfo>, "");

+ return new (BP) MacroInfo(L);

}

DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,

@@ -109,25 +109,6 @@ enum PPElifDiag {

PED_Elifndef

};

-// The -fmodule-name option tells the compiler to textually include headers in

-// the specified module, meaning clang won't build the specified module. This is

-// useful in a number of situations, for instance, when building a library that

-// vends a module map, one might want to avoid hitting intermediate build

-// products containimg the module map or avoid finding the system installed

-// modulemap for that library.

-static bool isForModuleBuilding(Module *M, StringRef CurrentModule,

- StringRef ModuleName) {

- StringRef TopLevelName = M->getTopLevelModuleName();

- // When building framework Foo, we wanna make sure that Foo *and* Foo_Private

- // are textually included and no modules are built for both.

- if (M->getTopLevelModule()->IsFramework && CurrentModule == ModuleName &&

- !CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private"))

- TopLevelName = TopLevelName.drop_back(8);

- return TopLevelName == CurrentModule;

static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {

const LangOptions &Lang = PP.getLangOpts();

if (isReservedInAllContexts(II->isReserved(Lang))) {

@@ -274,9 +255,9 @@ static bool warnByDefaultOnWrongCase(StringRef Include) {

/// \param Candidates the candidates to find a similar string.

///

/// \returns a similar string if exists. If no similar string exists,

-/// returns None.

-static Optional<StringRef> findSimilarStr(

- StringRef LHS, const std::vector<StringRef> &Candidates) {

+/// returns std::nullopt.

+static std::optional<StringRef>

+findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {

// We need to check if `Candidates` has the exact case-insensitive string

// because the Levenshtein distance match does not care about it.

for (StringRef C : Candidates) {

@@ -291,7 +272,7 @@ static Optional<StringRef> findSimilarStr(

size_t Length = LHS.size();

size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;

- Optional<std::pair<StringRef, size_t>> SimilarStr = None;

+ std::optional<std::pair<StringRef, size_t>> SimilarStr;

for (StringRef C : Candidates) {

size_t CurDist = LHS.edit_distance(C, true);

if (CurDist <= MaxDist) {

@@ -308,7 +289,7 @@ static Optional<StringRef> findSimilarStr(

if (SimilarStr) {

return SimilarStr->first;

} else {

- return None;

+ return std::nullopt;

}

@@ -456,7 +437,7 @@ void Preprocessor::SuggestTypoedDirective(const Token &Tok,

if (LangOpts.C2x || LangOpts.CPlusPlus2b)

Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});

- if (Optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {

+ if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {

// Directive cannot be coming from macro.

assert(Tok.getLocation().isFileID());

CharSourceRange DirectiveRange = CharSourceRange::getCharRange(

@@ -492,8 +473,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,

// lookup pointer.

assert(!SkippingExcludedConditionalBlock &&

"calling SkipExcludedConditionalBlock recursively");

- llvm::SaveAndRestore<bool> SARSkipping(SkippingExcludedConditionalBlock,

- true);

+ llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);

++NumSkipped;

assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?");

@@ -856,7 +836,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,

Tok.getLocation());

}

-Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {

+Module *Preprocessor::getModuleForLocation(SourceLocation Loc,

+ bool AllowTextual) {

if (!SourceMgr.isInMainFile(Loc)) {

// Try to determine the module of the include directive.

// FIXME: Look into directly passing the FileEntry from LookupFile instead.

@@ -864,7 +845,7 @@ Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {

if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) {

// The include comes from an included file.

return HeaderInfo.getModuleMap()

- .findModuleForHeader(EntryOfIncl)

+ .findModuleForHeader(EntryOfIncl, AllowTextual)

.getModule();

}

@@ -879,7 +860,8 @@ Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {

const FileEntry *

Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,

SourceLocation Loc) {

- Module *IncM = getModuleForLocation(IncLoc);

+ Module *IncM = getModuleForLocation(

+ IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);

// Walk up through the include stack, looking through textual headers of M

// until we hit a non-textual header that we can #include. (We assume textual

@@ -908,6 +890,10 @@ Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,

continue;

}

+ // Don't suggest explicitly excluded headers.

+ if (Header.getRole() == ModuleMap::ExcludedHeader)

+ continue;

// We'll suggest including textual headers below if they're

// include-guarded.

if (Header.getRole() & ModuleMap::TextualHeader)

@@ -943,17 +929,18 @@ Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,

return nullptr;

}

-Optional<FileEntryRef> Preprocessor::LookupFile(

+OptionalFileEntryRef Preprocessor::LookupFile(

SourceLocation FilenameLoc, StringRef Filename, bool isAngled,

ConstSearchDirIterator FromDir, const FileEntry *FromFile,

ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,

SmallVectorImpl<char> *RelativePath,

ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,

- bool *IsFrameworkFound, bool SkipCache) {

+ bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {

ConstSearchDirIterator CurDirLocal = nullptr;

ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;

- Module *RequestingModule = getModuleForLocation(FilenameLoc);

+ Module *RequestingModule = getModuleForLocation(

+ FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);

bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc);

// If the header lookup mechanism may be relative to the current inclusion

@@ -1007,7 +994,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile(

// the include path until we find that file or run out of files.

ConstSearchDirIterator TmpCurDir = CurDir;

ConstSearchDirIterator TmpFromDir = nullptr;

- while (Optional<FileEntryRef> FE = HeaderInfo.LookupFile(

+ while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(

Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,

Includers, SearchPath, RelativePath, RequestingModule,

SuggestedModule, /*IsMapped=*/nullptr,

@@ -1025,10 +1012,10 @@ Optional<FileEntryRef> Preprocessor::LookupFile(

}

// Do a standard file entry lookup.

- Optional<FileEntryRef> FE = HeaderInfo.LookupFile(

+ OptionalFileEntryRef FE = HeaderInfo.LookupFile(

Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,

RelativePath, RequestingModule, SuggestedModule, IsMapped,

- IsFrameworkFound, SkipCache, BuildSystemModule);

+ IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);

if (FE) {

if (SuggestedModule && !LangOpts.AsmPreprocessor)

HeaderInfo.getModuleMap().diagnoseHeaderInclusion(

@@ -1043,7 +1030,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile(

// headers on the #include stack and pass them to HeaderInfo.

if (IsFileLexer()) {

if ((CurFileEnt = CurPPLexer->getFileEntry())) {

- if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader(

+ if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(

Filename, CurFileEnt, SearchPath, RelativePath, RequestingModule,

SuggestedModule)) {

if (SuggestedModule && !LangOpts.AsmPreprocessor)

@@ -1058,7 +1045,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile(

for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {

if (IsFileLexer(ISEntry)) {

if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {

- if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader(

+ if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(

Filename, CurFileEnt, SearchPath, RelativePath,

RequestingModule, SuggestedModule)) {

if (SuggestedModule && !LangOpts.AsmPreprocessor)

@@ -1072,7 +1059,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile(

}

// Otherwise, we really couldn't find the file.

- return None;

+ return std::nullopt;

}

//===----------------------------------------------------------------------===//

@@ -1998,7 +1985,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,

}

-Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport(

+OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(

ConstSearchDirIterator *CurDir, StringRef &Filename,

SourceLocation FilenameLoc, CharSourceRange FilenameRange,

const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,

@@ -2006,24 +1993,26 @@ Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport(

const FileEntry *LookupFromFile, StringRef &LookupFilename,

SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,

ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {

- Optional<FileEntryRef> File = LookupFile(

- FilenameLoc, LookupFilename,

- isAngled, LookupFrom, LookupFromFile, CurDir,

+ OptionalFileEntryRef File = LookupFile(

+ FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,

Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,

&SuggestedModule, &IsMapped, &IsFrameworkFound);

if (File)

return File;

+ // Give the clients a chance to silently skip this include.

+ if (Callbacks && Callbacks->FileNotFound(Filename))

+ return std::nullopt;

if (SuppressIncludeNotFoundError)

- return None;

+ return std::nullopt;

// If the file could not be located and it was included via angle

// brackets, we can attempt a lookup as though it were a quoted path to

// provide the user with a possible fixit.

if (isAngled) {

- Optional<FileEntryRef> File = LookupFile(

- FilenameLoc, LookupFilename,

- false, LookupFrom, LookupFromFile, CurDir,

+ OptionalFileEntryRef File = LookupFile(

+ FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,

Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,

&SuggestedModule, &IsMapped,

/*IsFrameworkFound=*/nullptr);

@@ -2052,9 +2041,9 @@ Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport(

StringRef TypoCorrectionName = CorrectTypoFilename(Filename);

StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);

- Optional<FileEntryRef> File = LookupFile(

- FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom, LookupFromFile,

- CurDir, Callbacks ? &SearchPath : nullptr,

+ OptionalFileEntryRef File = LookupFile(

+ FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,

+ LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,

Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,

/*IsFrameworkFound=*/nullptr);

if (File) {

@@ -2090,7 +2079,7 @@ Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport(

<< CacheEntry.Directory->getName();

}

- return None;

+ return std::nullopt;

}

/// Handle either a #include-like directive or an import declaration that names

@@ -2177,7 +2166,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(

BackslashStyle = llvm::sys::path::Style::windows;

}

- Optional<FileEntryRef> File = LookupHeaderIncludeOrImport(

+ OptionalFileEntryRef File = LookupHeaderIncludeOrImport(

&CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,

IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,

LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);

@@ -2215,14 +2204,13 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(

alreadyIncluded(*File))

Action = IncludeLimitReached;

- bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule &&

- !isForModuleBuilding(SuggestedModule.getModule(),

- getLangOpts().CurrentModule,

- getLangOpts().ModuleName);

// FIXME: We do not have a good way to disambiguate C++ clang modules from

// C++ standard modules (other than use/non-use of Header Units).

Module *SM = SuggestedModule.getModule();

+ bool MaybeTranslateInclude =

+ Action == Enter && File && SM && !SM->isForBuilding(getLangOpts());

// Maybe a usable Header Unit

bool UsableHeaderUnit = false;

if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {

@@ -2235,14 +2223,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(

}

// Maybe a usable clang header module.

- bool UsableHeaderModule =

+ bool UsableClangHeaderModule =

(getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&

!SM->isHeaderUnit();

// Determine whether we should try to import the module for this #include, if

// there is one. Don't do so if precompiled module support is disabled or we

// are processing this module textually (because we're building the module).

- if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) {

+ if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {

// If this include corresponds to a module but that module is

// unavailable, diagnose the situation and bail out.

// FIXME: Remove this; loadModule does the same check (but produces

@@ -2281,11 +2269,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(

if (Imported) {

Action = Import;

} else if (Imported.isMissingExpected()) {

+ markClangModuleAsAffecting(

+ static_cast<Module *>(Imported)->getTopLevelModule());

// We failed to find a submodule that we assumed would exist (because it

// was in the directory of an umbrella header, for instance), but no

// actual module containing it exists (because the umbrella header is

// incomplete). Treat this as a textual inclusion.

SuggestedModule = ModuleMap::KnownHeader();

+ SM = nullptr;

} else if (Imported.isConfigMismatch()) {

// On a configuration mismatch, enter the header textually. We still know

// that it's part of the corresponding module.

@@ -2549,9 +2540,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(

// that behaves the same as the header would behave in a compilation using

// that PCH, which means we should enter the submodule. We need to teach

// the AST serialization layer to deal with the resulting AST.

- if (getLangOpts().CompilingPCH &&

- isForModuleBuilding(SM, getLangOpts().CurrentModule,

- getLangOpts().ModuleName))

+ if (getLangOpts().CompilingPCH && SM->isForBuilding(getLangOpts()))

return {ImportAction::None};

assert(!CurLexerSubmodule && "should not have marked this as a module yet");

diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index bd35689f18e7..aa411cfc5f2c 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp

@@ -869,7 +869,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,

/// to "!defined(X)" return X in IfNDefMacro.

Preprocessor::DirectiveEvalResult

Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {

- SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true);

+ SaveAndRestore PPDir(ParsingIfOrElifDirective, true);

// Save the current state of 'DisableMacroExpansion' and reset it to false. If

// 'DisableMacroExpansion' is true, then we must be in a macro argument list

// in which case a directive is undefined behavior. We want macros to be able

diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index 36d3aa59bb2f..66168467ecf5 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp

@@ -22,6 +22,7 @@

#include "llvm/Support/FileSystem.h"

#include "llvm/Support/MemoryBufferRef.h"

#include "llvm/Support/Path.h"

+#include <optional>

using namespace clang;

@@ -75,7 +76,7 @@ bool Preprocessor::EnterSourceFile(FileID FID, ConstSearchDirIterator CurDir,

MaxIncludeStackDepth = IncludeMacroStack.size();

// Get the MemoryBuffer for this FID, if it fails, we fail.

- llvm::Optional<llvm::MemoryBufferRef> InputFile =

+ std::optional<llvm::MemoryBufferRef> InputFile =

getSourceManager().getBufferOrNone(FID, Loc);

if (!InputFile) {

SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID);

@@ -94,8 +95,8 @@ bool Preprocessor::EnterSourceFile(FileID FID, ConstSearchDirIterator CurDir,

Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile);

if (getPreprocessorOpts().DependencyDirectivesForFile &&

FID != PredefinesFileID) {

- if (Optional<FileEntryRef> File = SourceMgr.getFileEntryRefForID(FID)) {

- if (Optional<ArrayRef<dependency_directives_scan::Directive>>

+ if (OptionalFileEntryRef File = SourceMgr.getFileEntryRefForID(FID)) {

+ if (std::optional<ArrayRef<dependency_directives_scan::Directive>>

DepDirectives =

getPreprocessorOpts().DependencyDirectivesForFile(*File)) {

TheLexer->DepDirectives = *DepDirectives;

diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index f3be2107f985..bbc271e5611e 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp

@@ -37,8 +37,6 @@

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/DenseSet.h"

#include "llvm/ADT/FoldingSet.h"

-#include "llvm/ADT/None.h"

-#include "llvm/ADT/Optional.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallString.h"

#include "llvm/ADT/SmallVector.h"

@@ -54,6 +52,7 @@

#include <cstddef>

#include <cstring>

#include <ctime>

+#include <optional>

#include <string>

#include <tuple>

#include <utility>

@@ -285,7 +284,8 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) {

// Dump module macros.

llvm::DenseSet<ModuleMacro*> Active;

- for (auto *MM : State ? State->getActiveModuleMacros(*this, II) : None)

+ for (auto *MM :

+ State ? State->getActiveModuleMacros(*this, II) : std::nullopt)

Active.insert(MM);

llvm::DenseSet<ModuleMacro*> Visited;

llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf.begin(), Leaf.end());

@@ -371,6 +371,8 @@ void Preprocessor::RegisterBuiltinMacros() {

Ident__has_feature = RegisterBuiltinMacro(*this, "__has_feature");

Ident__has_extension = RegisterBuiltinMacro(*this, "__has_extension");

Ident__has_builtin = RegisterBuiltinMacro(*this, "__has_builtin");

+ Ident__has_constexpr_builtin =

+ RegisterBuiltinMacro(*this, "__has_constexpr_builtin");

Ident__has_attribute = RegisterBuiltinMacro(*this, "__has_attribute");

if (!getLangOpts().CPlusPlus)

Ident__has_c_attribute = RegisterBuiltinMacro(*this, "__has_c_attribute");

@@ -387,6 +389,10 @@ void Preprocessor::RegisterBuiltinMacros() {

Ident__is_target_os = RegisterBuiltinMacro(*this, "__is_target_os");

Ident__is_target_environment =

RegisterBuiltinMacro(*this, "__is_target_environment");

+ Ident__is_target_variant_os =

+ RegisterBuiltinMacro(*this, "__is_target_variant_os");

+ Ident__is_target_variant_environment =

+ RegisterBuiltinMacro(*this, "__is_target_variant_environment");

// Modules.

Ident__building_module = RegisterBuiltinMacro(*this, "__building_module");

@@ -1081,8 +1087,15 @@ void Preprocessor::removeCachedMacroExpandedTokensOfLastLexer() {

/// the identifier tokens inserted.

static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,

Preprocessor &PP) {

- time_t TT = time(nullptr);

- struct tm *TM = localtime(&TT);

+ time_t TT;

+ std::tm *TM;

+ if (PP.getPreprocessorOpts().SourceDateEpoch) {

+ TT = *PP.getPreprocessorOpts().SourceDateEpoch;

+ TM = std::gmtime(&TT);

+ } else {

+ TT = std::time(nullptr);

+ TM = std::localtime(&TT);

+ }

static const char * const Months[] = {

"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"

@@ -1091,8 +1104,11 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,

{

SmallString<32> TmpBuffer;

llvm::raw_svector_ostream TmpStream(TmpBuffer);

- TmpStream << llvm::format("\"%s %2d %4d\"", Months[TM->tm_mon],

- TM->tm_mday, TM->tm_year + 1900);

+ if (TM)

+ TmpStream << llvm::format("\"%s %2d %4d\"", Months[TM->tm_mon],

+ TM->tm_mday, TM->tm_year + 1900);

+ else

+ TmpStream << "??? ?? ????";

Token TmpTok;

TmpTok.startToken();

PP.CreateString(TmpStream.str(), TmpTok);

@@ -1102,8 +1118,11 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,

{

SmallString<32> TmpBuffer;

llvm::raw_svector_ostream TmpStream(TmpBuffer);

- TmpStream << llvm::format("\"%02d:%02d:%02d\"",

- TM->tm_hour, TM->tm_min, TM->tm_sec);

+ if (TM)

+ TmpStream << llvm::format("\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min,

+ TM->tm_sec);

+ else

+ TmpStream << "??:??:??";

Token TmpTok;

TmpTok.startToken();

PP.CreateString(TmpStream.str(), TmpTok);

@@ -1230,7 +1249,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,

return false;

// Search include directories.

- Optional<FileEntryRef> File =

+ OptionalFileEntryRef File =

PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, LookupFromFile,

nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);

@@ -1282,7 +1301,7 @@ static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,

unsigned ParenDepth = 1;

SourceLocation LParenLoc = Tok.getLocation();

- llvm::Optional<int> Result;

+ std::optional<int> Result;

Token ResultTok;

bool SuppressDiagnostic = false;

@@ -1326,10 +1345,10 @@ already_lexed:

// The last ')' has been reached; return the value if one found or

// a diagnostic and a dummy value.

if (Result) {

- OS << Result.value();

+ OS << *Result;

// For strict conformance to __has_cpp_attribute rules, use 'L'

// suffix for dated literals.

- if (Result.value() > 1)

+ if (*Result > 1)

OS << 'L';

} else {

OS << 0;

@@ -1428,9 +1447,47 @@ static bool isTargetEnvironment(const TargetInfo &TI,

const IdentifierInfo *II) {

std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str();

llvm::Triple Env(EnvName);

+ // The unknown environment is matched only if

+ // '__is_target_environment(unknown)' is used.

+ if (Env.getEnvironment() == llvm::Triple::UnknownEnvironment &&

+ EnvName != "---unknown")

+ return false;

return TI.getTriple().getEnvironment() == Env.getEnvironment();

}

+/// Implements the __is_target_variant_os builtin macro.

+static bool isTargetVariantOS(const TargetInfo &TI, const IdentifierInfo *II) {

+ if (TI.getTriple().isOSDarwin()) {

+ const llvm::Triple *VariantTriple = TI.getDarwinTargetVariantTriple();

+ if (!VariantTriple)

+ return false;

+ std::string OSName =

+ (llvm::Twine("unknown-unknown-") + II->getName().lower()).str();

+ llvm::Triple OS(OSName);

+ if (OS.getOS() == llvm::Triple::Darwin) {

+ // Darwin matches macos, ios, etc.

+ return VariantTriple->isOSDarwin();

+ }

+ return VariantTriple->getOS() == OS.getOS();

+ }

+ return false;

+/// Implements the __is_target_variant_environment builtin macro.

+static bool isTargetVariantEnvironment(const TargetInfo &TI,

+ const IdentifierInfo *II) {

+ if (TI.getTriple().isOSDarwin()) {

+ const llvm::Triple *VariantTriple = TI.getDarwinTargetVariantTriple();

+ if (!VariantTriple)

+ return false;

+ std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str();

+ llvm::Triple Env(EnvName);

+ return VariantTriple->getEnvironment() == Env.getEnvironment();

+ }

+ return false;

/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded

/// as a builtin macro, handle it and return the next token as 'Tok'.

void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

@@ -1556,22 +1613,24 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

Diag(Tok.getLocation(), diag::warn_pp_date_time);

// MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be

// of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.

- // Get the file that we are lexing out of. If we're currently lexing from

- // a macro, dig into the include stack.

- const FileEntry *CurFile = nullptr;

- PreprocessorLexer *TheLexer = getCurrentFileLexer();

- if (TheLexer)

- CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID());

const char *Result;

- if (CurFile) {

- time_t TT = CurFile->getModificationTime();

- struct tm *TM = localtime(&TT);

+ if (getPreprocessorOpts().SourceDateEpoch) {

+ time_t TT = *getPreprocessorOpts().SourceDateEpoch;

+ std::tm *TM = std::gmtime(&TT);

Result = asctime(TM);

} else {

- Result = "??? ??? ?? ??:??:?? ????\n";

+ // Get the file that we are lexing out of. If we're currently lexing from

+ // a macro, dig into the include stack.

+ const FileEntry *CurFile = nullptr;

+ if (PreprocessorLexer *TheLexer = getCurrentFileLexer())

+ CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID());

+ if (CurFile) {

+ time_t TT = CurFile->getModificationTime();

+ struct tm *TM = localtime(&TT);

+ Result = asctime(TM);

+ } else {

+ Result = "??? ??? ?? ??:??:?? ????\n";

+ }

}

// Surround the string with " and strip the trailing newline.

OS << '"' << StringRef(Result).drop_back() << '"';

@@ -1663,7 +1722,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

.Case("__array_rank", true)

.Case("__array_extent", true)

.Case("__reference_binds_to_temporary", true)

- .Case("__underlying_type", true)

+#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) .Case("__" #Trait, true)

+#include "clang/Basic/TransformTypeTraits.def"

.Default(false);

} else {

return llvm::StringSwitch<bool>(II->getName())

@@ -1677,9 +1737,23 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

.Case("__is_target_vendor", true)

.Case("__is_target_os", true)

.Case("__is_target_environment", true)

+ .Case("__is_target_variant_os", true)

+ .Case("__is_target_variant_environment", true)

.Default(false);

}

});

+ } else if (II == Ident__has_constexpr_builtin) {

+ EvaluateFeatureLikeBuiltinMacro(

+ OS, Tok, II, *this, false,

+ [this](Token &Tok, bool &HasLexedNextToken) -> int {

+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(

+ Tok, *this, diag::err_feature_check_malformed);

+ if (!II)

+ return false;

+ unsigned BuiltinOp = II->getBuiltinID();

+ return BuiltinOp != 0 &&

+ this->getBuiltinInfo().isConstantEvaluated(BuiltinOp);

+ });

} else if (II == Ident__is_identifier) {

EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,

[](Token &Tok, bool &HasLexedNextToken) -> int {

@@ -1877,6 +1951,22 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {

Tok, *this, diag::err_feature_check_malformed);

return II && isTargetEnvironment(getTargetInfo(), II);

});

+ } else if (II == Ident__is_target_variant_os) {

+ EvaluateFeatureLikeBuiltinMacro(

+ OS, Tok, II, *this, false,

+ [this](Token &Tok, bool &HasLexedNextToken) -> int {

+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(

+ Tok, *this, diag::err_feature_check_malformed);

+ return II && isTargetVariantOS(getTargetInfo(), II);

+ });

+ } else if (II == Ident__is_target_variant_environment) {

+ EvaluateFeatureLikeBuiltinMacro(

+ OS, Tok, II, *this, false,

+ [this](Token &Tok, bool &HasLexedNextToken) -> int {

+ IdentifierInfo *II = ExpectFeatureIdentifierInfo(

+ Tok, *this, diag::err_feature_check_malformed);

+ return II && isTargetVariantEnvironment(getTargetInfo(), II);

+ });

} else {

llvm_unreachable("Unknown identifier!");

}

diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index fb4f2dc45758..4da9d1603770 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp

@@ -48,6 +48,7 @@

#include <cstddef>

#include <cstdint>

#include <limits>

+#include <optional>

#include <string>

#include <utility>

#include <vector>

@@ -527,7 +528,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {

return;

// Search include directories for this file.

- Optional<FileEntryRef> File =

+ OptionalFileEntryRef File =

LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr,

nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);

if (!File) {

@@ -1043,7 +1044,7 @@ struct PragmaDebugHandler : public PragmaHandler {

Token Tok;

PP.LexUnexpandedToken(Tok);

if (Tok.isNot(tok::identifier)) {

- PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid);

+ PP.Diag(Tok, diag::warn_pragma_debug_missing_command);

return;

}

IdentifierInfo *II = Tok.getIdentifierInfo();

@@ -1181,6 +1182,23 @@ struct PragmaDebugHandler : public PragmaHandler {

PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command)

<< DumpII->getName();

}

+ } else if (II->isStr("sloc_usage")) {

+ // An optional integer literal argument specifies the number of files to

+ // specifically report information about.

+ std::optional<unsigned> MaxNotes;

+ Token ArgToken;

+ PP.Lex(ArgToken);

+ uint64_t Value;

+ if (ArgToken.is(tok::numeric_constant) &&

+ PP.parseSimpleIntegerLiteral(ArgToken, Value)) {

+ MaxNotes = Value;

+ } else if (ArgToken.isNot(tok::eod)) {

+ PP.Diag(ArgToken, diag::warn_pragma_debug_unexpected_argument);

+ }

+ PP.Diag(Tok, diag::remark_sloc_usage);

+ PP.getSourceManager().noteSLocAddressSpaceUsage(PP.getDiagnostics(),

+ MaxNotes);

} else {

PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command)

<< II->getName();

@@ -1940,6 +1958,15 @@ struct PragmaRegionHandler : public PragmaHandler {

}

};

+/// "\#pragma managed"

+/// "\#pragma managed(...)"

+/// "\#pragma unmanaged"

+/// MSVC ignores this pragma when not compiling using /clr, which clang doesn't

+/// support. We parse it and ignore it to avoid -Wunknown-pragma warnings.

+struct PragmaManagedHandler : public EmptyPragmaHandler {

+ PragmaManagedHandler(const char *pragma) : EmptyPragmaHandler(pragma) {}

+};

/// This handles parsing pragmas that take a macro name and optional message

static IdentifierInfo *HandleMacroAnnotationPragma(Preprocessor &PP, Token &Tok,

const char *Pragma,

@@ -2112,6 +2139,8 @@ void Preprocessor::RegisterBuiltinPragmas() {

AddPragmaHandler(new PragmaIncludeAliasHandler());

AddPragmaHandler(new PragmaHdrstopHandler());

AddPragmaHandler(new PragmaSystemHeaderHandler());

+ AddPragmaHandler(new PragmaManagedHandler("managed"));

+ AddPragmaHandler(new PragmaManagedHandler("unmanaged"));

}

// Pragmas added by plugins

diff --git a/clang/lib/Lex/PreprocessingRecord.cpp b/clang/lib/Lex/PreprocessingRecord.cpp
index 2146a7c04217..85eb57f61611 100644
--- a/clang/lib/Lex/PreprocessingRecord.cpp
+++ b/clang/lib/Lex/PreprocessingRecord.cpp

@@ -20,7 +20,6 @@

#include "clang/Lex/MacroInfo.h"

#include "clang/Lex/Token.h"

#include "llvm/ADT/DenseMap.h"

-#include "llvm/ADT/Optional.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/iterator_range.h"

#include "llvm/Support/Capacity.h"

@@ -31,6 +30,7 @@

#include <cstddef>

#include <cstring>

#include <iterator>

+#include <optional>

#include <utility>

#include <vector>

@@ -42,7 +42,7 @@ ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() =

InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec,

InclusionKind Kind, StringRef FileName,

bool InQuotes, bool ImportedModule,

- Optional<FileEntryRef> File,

+ OptionalFileEntryRef File,

SourceRange Range)

: PreprocessingDirective(InclusionDirectiveKind, Range), InQuotes(InQuotes),

Kind(Kind), ImportedModule(ImportedModule), File(File) {

@@ -112,10 +112,9 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) {

// See if the external source can see if the entity is in the file without

// deserializing it.

- Optional<bool> IsInFile =

- ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID);

- if (IsInFile)

- return IsInFile.value();

+ if (std::optional<bool> IsInFile =

+ ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID))

+ return *IsInFile;

// The external source did not provide a definite answer, go and deserialize

// the entity to check it.

@@ -476,15 +475,9 @@ void PreprocessingRecord::MacroUndefined(const Token &Id,

}

void PreprocessingRecord::InclusionDirective(

- SourceLocation HashLoc,

- const Token &IncludeTok,

- StringRef FileName,

- bool IsAngled,

- CharSourceRange FilenameRange,

- Optional<FileEntryRef> File,

- StringRef SearchPath,

- StringRef RelativePath,

- const Module *Imported,

+ SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,

+ bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,

+ StringRef SearchPath, StringRef RelativePath, const Module *Imported,

SrcMgr::CharacteristicKind FileType) {

InclusionDirective::InclusionKind Kind = InclusionDirective::Include;

diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 5310db3c882b..fe9adb5685e3 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp

@@ -58,7 +58,6 @@

#include "llvm/ADT/SmallString.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/StringRef.h"

-#include "llvm/ADT/StringSwitch.h"

#include "llvm/Support/Capacity.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MemoryBuffer.h"

@@ -66,6 +65,7 @@

#include <algorithm>

#include <cassert>

#include <memory>

+#include <optional>

#include <string>

#include <utility>

#include <vector>

@@ -166,12 +166,6 @@ Preprocessor::~Preprocessor() {

IncludeMacroStack.clear();

- // Destroy any macro definitions.

- while (MacroInfoChain *I = MIChainHead) {

- MIChainHead = I->Next;

- I->~MacroInfoChain();

- }

// Free any cached macro expanders.

// This populates MacroArgCache, so all TokenLexers need to be destroyed

// before the code below that frees up the MacroArgCache list.

@@ -406,7 +400,7 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,

assert(!CodeCompletionFile && "Already set");

// Load the actual file's contents.

- Optional<llvm::MemoryBufferRef> Buffer =

+ std::optional<llvm::MemoryBufferRef> Buffer =

SourceMgr.getMemoryBufferForFileOrNone(File);

if (!Buffer)

return true;

@@ -535,6 +529,13 @@ Module *Preprocessor::getCurrentModule() {

return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);

}

+Module *Preprocessor::getCurrentModuleImplementation() {

+ if (!getLangOpts().isCompilingModuleImplementation())

+ return nullptr;

+ return getHeaderSearchInfo().lookupModule(getLangOpts().ModuleName);

//===----------------------------------------------------------------------===//

// Preprocessor Initialization Methods

//===----------------------------------------------------------------------===//

@@ -580,7 +581,7 @@ void Preprocessor::EnterMainSourceFile() {

if (!PPOpts->PCHThroughHeader.empty()) {

// Lookup and save the FileID for the through header. If it isn't found

// in the search path, it's a fatal error.

- Optional<FileEntryRef> File = LookupFile(

+ OptionalFileEntryRef File = LookupFile(

SourceLocation(), PPOpts->PCHThroughHeader,

/*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,

/*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,

@@ -773,29 +774,6 @@ void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {

Diag(Identifier,it->second) << Identifier.getIdentifierInfo();

}

-/// Returns a diagnostic message kind for reporting a future keyword as

-/// appropriate for the identifier and specified language.

-static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,

- const LangOptions &LangOpts) {

- assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");

- if (LangOpts.CPlusPlus)

- return llvm::StringSwitch<diag::kind>(II.getName())

-#define CXX11_KEYWORD(NAME, FLAGS) \

- .Case(#NAME, diag::warn_cxx11_keyword)

-#define CXX20_KEYWORD(NAME, FLAGS) \

- .Case(#NAME, diag::warn_cxx20_keyword)

-#include "clang/Basic/TokenKinds.def"

- // char8_t is not modeled as a CXX20_KEYWORD because it's not

- // unconditionally enabled in C++20 mode. (It can be disabled

- // by -fno-char8_t.)

- .Case("char8_t", diag::warn_cxx20_keyword)

- ;

- llvm_unreachable(

- "Keyword not known to come from a newer Standard or proposed Standard");

void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {

assert(II.isOutOfDate() && "not out of date");

getExternalSource()->updateOutOfDateIdentifier(II);

@@ -867,7 +845,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {

// FIXME: This warning is disabled in cases where it shouldn't be, like

// "#define constexpr constexpr", "int constexpr;"

if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {

- Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))

+ Diag(Identifier, getIdentifierTable().getFutureCompatDiagKind(II, getLangOpts()))

<< II.getName();

// Don't diagnose this keyword again in this translation unit.

II.setIsFutureCompatKeyword(false);

@@ -894,7 +872,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {

(getLangOpts().Modules || getLangOpts().DebuggerSupport) &&

CurLexerKind != CLK_CachingLexer) {

ModuleImportLoc = Identifier.getLocation();

- ModuleImportPath.clear();

+ NamedModuleImportPath.clear();

ModuleImportExpectsIdentifier = true;

CurLexerKind = CLK_LexAfterModuleImport;

}

@@ -939,57 +917,57 @@ void Preprocessor::Lex(Token &Result) {

Result.setIdentifierInfo(nullptr);

}

- // Update ImportSeqState to track our position within a C++20 import-seq

+ // Update StdCXXImportSeqState to track our position within a C++20 import-seq

// if this token is being produced as a result of phase 4 of translation.

// Update TrackGMFState to decide if we are currently in a Global Module

- // Fragment. GMF state updates should precede ImportSeq ones, since GMF state

- // depends on the prevailing ImportSeq state in two cases.

+ // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state

+ // depends on the prevailing StdCXXImportSeq state in two cases.

if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&

!Result.getFlag(Token::IsReinjected)) {

switch (Result.getKind()) {

case tok::l_paren: case tok::l_square: case tok::l_brace:

- ImportSeqState.handleOpenBracket();

+ StdCXXImportSeqState.handleOpenBracket();

break;

case tok::r_paren: case tok::r_square:

- ImportSeqState.handleCloseBracket();

+ StdCXXImportSeqState.handleCloseBracket();

break;

case tok::r_brace:

- ImportSeqState.handleCloseBrace();

+ StdCXXImportSeqState.handleCloseBrace();

break;

// This token is injected to represent the translation of '#include "a.h"'

// into "import a.h;". Mimic the notional ';'.

case tok::annot_module_include:

case tok::semi:

TrackGMFState.handleSemi();

- ImportSeqState.handleSemi();

+ StdCXXImportSeqState.handleSemi();

break;

case tok::header_name:

case tok::annot_header_unit:

- ImportSeqState.handleHeaderName();

+ StdCXXImportSeqState.handleHeaderName();

break;

case tok::kw_export:

TrackGMFState.handleExport();

- ImportSeqState.handleExport();

+ StdCXXImportSeqState.handleExport();

break;

case tok::identifier:

if (Result.getIdentifierInfo()->isModulesImport()) {

- TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq());

- ImportSeqState.handleImport();

- if (ImportSeqState.afterImportSeq()) {

+ TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());

+ StdCXXImportSeqState.handleImport();

+ if (StdCXXImportSeqState.afterImportSeq()) {

ModuleImportLoc = Result.getLocation();

- ModuleImportPath.clear();

+ NamedModuleImportPath.clear();

ModuleImportExpectsIdentifier = true;

CurLexerKind = CLK_LexAfterModuleImport;

}

break;

} else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {

- TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq());

+ TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());

break;

}

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

default:

TrackGMFState.handleMisc();

- ImportSeqState.handleMisc();

+ StdCXXImportSeqState.handleMisc();

break;

}

@@ -1170,7 +1148,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {

// For now, we only support header-name imports in C++20 mode.

// FIXME: Should we allow this in all language modes that support an import

// declaration as an extension?

- if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {

+ if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {

if (LexHeaderName(Result))

return true;

} else {

@@ -1232,7 +1210,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {

Suffix.back().setLocation(SemiLoc);

Suffix.back().setAnnotationEndLoc(SemiLoc);

Suffix.back().setAnnotationValue(Action.ModuleForHeader);

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case ImportAction::ModuleImport:

case ImportAction::HeaderUnitImport:

@@ -1266,7 +1244,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {

if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {

// We expected to see an identifier here, and we did; continue handling

// identifiers.

- ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),

+ NamedModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),

Result.getLocation()));

ModuleImportExpectsIdentifier = false;

CurLexerKind = CLK_LexAfterModuleImport;

@@ -1283,7 +1261,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {

}

// If we didn't recognize a module name at all, this is not a (valid) import.

- if (ModuleImportPath.empty() || Result.is(tok::eof))

+ if (NamedModuleImportPath.empty() || Result.is(tok::eof))

return true;

// Consume the pp-import-suffix and expand any macros in it now, if we're not

@@ -1306,28 +1284,28 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {

// FIXME: Is this the right level to be performing this transformation?

std::string FlatModuleName;

if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {

- for (auto &Piece : ModuleImportPath) {

+ for (auto &Piece : NamedModuleImportPath) {

if (!FlatModuleName.empty())

FlatModuleName += ".";

FlatModuleName += Piece.first->getName();

}

- SourceLocation FirstPathLoc = ModuleImportPath[0].second;

- ModuleImportPath.clear();

- ModuleImportPath.push_back(

+ SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;

+ NamedModuleImportPath.clear();

+ NamedModuleImportPath.push_back(

std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));

}

Module *Imported = nullptr;

if (getLangOpts().Modules) {

Imported = TheModuleLoader.loadModule(ModuleImportLoc,

- ModuleImportPath,

+ NamedModuleImportPath,

Module::Hidden,

/*IsInclusionDirective=*/false);

if (Imported)

makeModuleVisible(Imported, SemiLoc);

}

if (Callbacks)

- Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);

+ Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);

if (!Suffix.empty()) {

EnterTokens(Suffix);

diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
index f6b005d9e19c..1b3201bd805b 100644
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp

@@ -240,7 +240,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,

// it as an identifier.

if (!PrevTok.hasUDSuffix())

return false;

- LLVM_FALLTHROUGH;

+ [[fallthrough]];

case tok::identifier: // id+id or id+number or id+L"foo".

// id+'.'... will not append.

if (Tok.is(tok::numeric_constant))

diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index efda6d0046fa..c6968b9f417e 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp

@@ -25,11 +25,13 @@

#include "clang/Lex/Token.h"

#include "clang/Lex/VariadicMacroSupport.h"

#include "llvm/ADT/ArrayRef.h"

+#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallString.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/iterator_range.h"

#include <cassert>

#include <cstring>

+#include <optional>

using namespace clang;

@@ -203,7 +205,7 @@ void TokenLexer::stringifyVAOPTContents(

assert(CurTokenIdx != 0 &&

"Can not have __VAOPT__ contents begin with a ##");

Token &LHS = VAOPTTokens[CurTokenIdx - 1];

- pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens),

+ pasteTokens(LHS, llvm::ArrayRef(VAOPTTokens, NumVAOptTokens),

CurTokenIdx);

// Replace the token prior to the first ## in this iteration.

ConcatenatedVAOPTResultToks.back() = LHS;

@@ -247,7 +249,7 @@ void TokenLexer::ExpandFunctionArguments() {

// we install the newly expanded sequence as the new 'Tokens' list.

bool MadeChange = false;

- Optional<bool> CalledWithVariadicArguments;

+ std::optional<bool> CalledWithVariadicArguments;

VAOptExpansionContext VCtx(PP);

@@ -721,7 +723,7 @@ bool TokenLexer::Lex(Token &Tok) {

}

bool TokenLexer::pasteTokens(Token &Tok) {

- return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx);

+ return pasteTokens(Tok, llvm::ArrayRef(Tokens, NumTokens), CurTokenIdx);

}

/// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ##

@@ -984,65 +986,71 @@ TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {

/// \arg begin_tokens will be updated to a position past all the found

/// consecutive tokens.

static void updateConsecutiveMacroArgTokens(SourceManager &SM,

- SourceLocation InstLoc,

+ SourceLocation ExpandLoc,

Token *&begin_tokens,

Token * end_tokens) {

- assert(begin_tokens < end_tokens);

- SourceLocation FirstLoc = begin_tokens->getLocation();

- SourceLocation CurLoc = FirstLoc;

- // Compare the source location offset of tokens and group together tokens that

- // are close, even if their locations point to different FileIDs. e.g.

- //

- // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)

- // ^ ^

- // |bar foo cake| (one SLocEntry chunk for all tokens)

- //

- // we can perform this "merge" since the token's spelling location depends

- // on the relative offset.

- Token *NextTok = begin_tokens + 1;

- for (; NextTok < end_tokens; ++NextTok) {

- SourceLocation NextLoc = NextTok->getLocation();

- if (CurLoc.isFileID() != NextLoc.isFileID())

- break; // Token from different kind of FileID.

- SourceLocation::IntTy RelOffs;

- if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))

- break; // Token from different local/loaded location.

- // Check that token is not before the previous token or more than 50

- // "characters" away.

- if (RelOffs < 0 || RelOffs > 50)

- break;

- if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))

- break; // Token from a different macro.

- CurLoc = NextLoc;

+ assert(begin_tokens + 1 < end_tokens);

+ SourceLocation BeginLoc = begin_tokens->getLocation();

+ llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);

+ llvm::MutableArrayRef<Token> Partition;

+ auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {

+ // The maximum distance between two consecutive tokens in a partition.

+ // This is an important trick to avoid using too much SourceLocation address

+ // space!

+ static constexpr SourceLocation::IntTy MaxDistance = 50;

+ auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();

+ Last = Loc;

+ return Distance <= MaxDistance;

+ };

+ // Partition the tokens by their FileID.

+ // This is a hot function, and calling getFileID can be expensive, the

+ // implementation is optimized by reducing the number of getFileID.

+ if (BeginLoc.isFileID()) {

+ // Consecutive tokens not written in macros must be from the same file.

+ // (Neither #include nor eof can occur inside a macro argument.)

+ Partition = All.take_while([&](const Token &T) {

+ return T.getLocation().isFileID() && NearLast(T.getLocation());

+ });

+ } else {

+ // Call getFileID once to calculate the bounds, and use the cheaper

+ // sourcelocation-against-bounds comparison.

+ FileID BeginFID = SM.getFileID(BeginLoc);

+ SourceLocation Limit =

+ SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));

+ Partition = All.take_while([&](const Token &T) {

+ return T.getLocation() >= BeginLoc && T.getLocation() < Limit &&

+ NearLast(T.getLocation());

+ });

}

+ assert(!Partition.empty());

// For the consecutive tokens, find the length of the SLocEntry to contain

// all of them.

- Token &LastConsecutiveTok = *(NextTok-1);

- SourceLocation::IntTy LastRelOffs = 0;

- SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),

- &LastRelOffs);

SourceLocation::UIntTy FullLength =

- LastRelOffs + LastConsecutiveTok.getLength();

+ Partition.back().getEndLoc().getRawEncoding() -

+ Partition.front().getLocation().getRawEncoding();

// Create a macro expansion SLocEntry that will "contain" all of the tokens.

SourceLocation Expansion =

- SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);

+ SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);

+#ifdef EXPENSIVE_CHECKS

+ assert(llvm::all_of(Partition.drop_front(),

+ [&SM, ID = SM.getFileID(Partition.front().getLocation())](

+ const Token &T) {

+ return ID == SM.getFileID(T.getLocation());

+ }) &&

+ "Must have the same FIleID!");

+#endif

// Change the location of the tokens from the spelling location to the new

// expanded location.

- for (; begin_tokens < NextTok; ++begin_tokens) {

- Token &Tok = *begin_tokens;

- SourceLocation::IntTy RelOffs = 0;

- SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);

- Tok.setLocation(Expansion.getLocWithOffset(RelOffs));

+ for (Token& T : Partition) {

+ SourceLocation::IntTy RelativeOffset =

+ T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();

+ T.setLocation(Expansion.getLocWithOffset(RelativeOffset));

}

+ begin_tokens = &Partition.back() + 1;

}

/// Creates SLocEntries and updates the locations of macro argument

@@ -1055,7 +1063,7 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,

Token *end_tokens) {

SourceManager &SM = PP.getSourceManager();

- SourceLocation InstLoc =

+ SourceLocation ExpandLoc =

getExpansionLocForMacroDefLoc(ArgIdSpellLoc);

while (begin_tokens < end_tokens) {

@@ -1063,12 +1071,12 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,

if (end_tokens - begin_tokens == 1) {

Token &Tok = *begin_tokens;

Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),

- InstLoc,

+ ExpandLoc,

Tok.getLength()));

return;

}

- updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);

+ updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);

}

diff --git a/clang/lib/Lex/UnicodeCharSets.h b/clang/lib/Lex/UnicodeCharSets.h
index e79a85bc72b7..5316d2540b76 100644
--- a/clang/lib/Lex/UnicodeCharSets.h
+++ b/clang/lib/Lex/UnicodeCharSets.h

@@ -10,7 +10,7 @@

#include "llvm/Support/UnicodeCharRanges.h"

-// Unicode 14 XID_Start

+// Unicode 15.0 XID_Start

static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {

{0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA},

{0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6},

@@ -170,69 +170,72 @@ static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {

{0x11144, 0x11144}, {0x11147, 0x11147}, {0x11150, 0x11172},

{0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4},

{0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211},

- {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288},

- {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8},

- {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310},

- {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},

- {0x11335, 0x11339}, {0x1133D, 0x1133D}, {0x11350, 0x11350},

- {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A},

- {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5},

- {0x114C7, 0x114C7}, {0x11580, 0x115AE}, {0x115D8, 0x115DB},

- {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA},

- {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11740, 0x11746},

- {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906},

- {0x11909, 0x11909}, {0x1190C, 0x11913}, {0x11915, 0x11916},

- {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941},

- {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1},

- {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, {0x11A0B, 0x11A32},

- {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89},

- {0x11A9D, 0x11A9D}, {0x11AB0, 0x11AF8}, {0x11C00, 0x11C08},

- {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F},

- {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30},

- {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68},

- {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2},

- {0x11FB0, 0x11FB0}, {0x12000, 0x12399}, {0x12400, 0x1246E},

- {0x12480, 0x12543}, {0x12F90, 0x12FF0}, {0x13000, 0x1342E},

- {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E},

- {0x16A70, 0x16ABE}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F},

- {0x16B40, 0x16B43}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F},

- {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50},

- {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3},

- {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},

- {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},

- {0x1B000, 0x1B122}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167},

- {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C},

- {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454},

- {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2},

- {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9},

- {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505},

- {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C},

- {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544},

- {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5},

- {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA},

- {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E},

- {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8},

- {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1DF00, 0x1DF1E},

+ {0x11213, 0x1122B}, {0x1123F, 0x11240}, {0x11280, 0x11286},

+ {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},

+ {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C},

+ {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330},

+ {0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133D, 0x1133D},

+ {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434},

+ {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF},

+ {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE},

+ {0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644},

+ {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A},

+ {0x11740, 0x11746}, {0x11800, 0x1182B}, {0x118A0, 0x118DF},

+ {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913},

+ {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F},

+ {0x11941, 0x11941}, {0x119A0, 0x119A7}, {0x119AA, 0x119D0},

+ {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00},

+ {0x11A0B, 0x11A32}, {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50},

+ {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AB0, 0x11AF8},

+ {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40},

+ {0x11C72, 0x11C8F}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09},

+ {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65},

+ {0x11D67, 0x11D68}, {0x11D6A, 0x11D89}, {0x11D98, 0x11D98},

+ {0x11EE0, 0x11EF2}, {0x11F02, 0x11F02}, {0x11F04, 0x11F10},

+ {0x11F12, 0x11F33}, {0x11FB0, 0x11FB0}, {0x12000, 0x12399},

+ {0x12400, 0x1246E}, {0x12480, 0x12543}, {0x12F90, 0x12FF0},

+ {0x13000, 0x1342F}, {0x13441, 0x13446}, {0x14400, 0x14646},

+ {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A70, 0x16ABE},

+ {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43},

+ {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F},

+ {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F},

+ {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7},

+ {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, {0x1AFF0, 0x1AFF3},

+ {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, {0x1B000, 0x1B122},

+ {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, {0x1B155, 0x1B155},

+ {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A},

+ {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99},

+ {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F},

+ {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC},

+ {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3},

+ {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},

+ {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E},

+ {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550},

+ {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA},

+ {0x1D6DC, 0x1D6FA}, {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734},

+ {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788},

+ {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB},

+ {0x1DF00, 0x1DF1E}, {0x1DF25, 0x1DF2A}, {0x1E030, 0x1E06D},

{0x1E100, 0x1E12C}, {0x1E137, 0x1E13D}, {0x1E14E, 0x1E14E},

- {0x1E290, 0x1E2AD}, {0x1E2C0, 0x1E2EB}, {0x1E7E0, 0x1E7E6},

- {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE},

- {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B},

- {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22},

- {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, {0x1EE29, 0x1EE32},

- {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B},

- {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49},

- {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},

- {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59},

- {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F},

- {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A},

- {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C},

- {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B},

- {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB},

- {0x20000, 0x2A6DF}, {0x2A700, 0x2B738}, {0x2B740, 0x2B81D},

- {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D},

- {0x30000, 0x3134A}};

-// Unicode 14 XID_Continue, excluding XID_Start

+ {0x1E290, 0x1E2AD}, {0x1E2C0, 0x1E2EB}, {0x1E4D0, 0x1E4EB},

+ {0x1E7E0, 0x1E7E6}, {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE},

+ {0x1E7F0, 0x1E7FE}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943},

+ {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F},

+ {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},

+ {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39},

+ {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47},

+ {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F},

+ {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57},

+ {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D},

+ {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64},

+ {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77},

+ {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89},

+ {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},

+ {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DF}, {0x2A700, 0x2B739},

+ {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0},

+ {0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, {0x31350, 0x323AF}};

+// Unicode 15.0 XID_Continue, excluding XID_Start

// The Unicode Property XID_Continue is a super set of XID_Start.

// To save Space, the table below only contains the codepoints

// that are not also in XID_Start.

@@ -268,64 +271,65 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {

{0x0C66, 0x0C6F}, {0x0C81, 0x0C83}, {0x0CBC, 0x0CBC},

{0x0CBE, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD},

{0x0CD5, 0x0CD6}, {0x0CE2, 0x0CE3}, {0x0CE6, 0x0CEF},

- {0x0D00, 0x0D03}, {0x0D3B, 0x0D3C}, {0x0D3E, 0x0D44},

- {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D57, 0x0D57},

- {0x0D62, 0x0D63}, {0x0D66, 0x0D6F}, {0x0D81, 0x0D83},

- {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, {0x0DD6, 0x0DD6},

- {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, {0x0DF2, 0x0DF3},

- {0x0E31, 0x0E31}, {0x0E33, 0x0E3A}, {0x0E47, 0x0E4E},

- {0x0E50, 0x0E59}, {0x0EB1, 0x0EB1}, {0x0EB3, 0x0EBC},

- {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, {0x0F18, 0x0F19},

- {0x0F20, 0x0F29}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},

- {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F}, {0x0F71, 0x0F84},

- {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, {0x0F99, 0x0FBC},

- {0x0FC6, 0x0FC6}, {0x102B, 0x103E}, {0x1040, 0x1049},

- {0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064},

- {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D},

- {0x108F, 0x109D}, {0x135D, 0x135F}, {0x1369, 0x1371},

- {0x1712, 0x1715}, {0x1732, 0x1734}, {0x1752, 0x1753},

- {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD},

- {0x17E0, 0x17E9}, {0x180B, 0x180D}, {0x180F, 0x1819},

- {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B},

- {0x1946, 0x194F}, {0x19D0, 0x19DA}, {0x1A17, 0x1A1B},

- {0x1A55, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89},

- {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD}, {0x1ABF, 0x1ACE},

- {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B50, 0x1B59},

- {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD},

- {0x1BB0, 0x1BB9}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37},

- {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x1CD0, 0x1CD2},

- {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4},

- {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF}, {0x203F, 0x2040},

- {0x2054, 0x2054}, {0x20D0, 0x20DC}, {0x20E1, 0x20E1},

- {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},

- {0x2DE0, 0x2DFF}, {0x302A, 0x302F}, {0x3099, 0x309A},

- {0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D},

- {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802},

- {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827},

- {0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C5},

- {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA909},

- {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983},

- {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5},

- {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43},

- {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7D},

- {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},

- {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF},

- {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED},

- {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},

- {0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F},

- {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, {0xFF9E, 0xFF9F},

- {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A},

- {0x104A0, 0x104A9}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06},

- {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F},

- {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10D30, 0x10D39},

- {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x10F82, 0x10F85},

- {0x11000, 0x11002}, {0x11038, 0x11046}, {0x11066, 0x11070},

- {0x11073, 0x11074}, {0x1107F, 0x11082}, {0x110B0, 0x110BA},

- {0x110C2, 0x110C2}, {0x110F0, 0x110F9}, {0x11100, 0x11102},

- {0x11127, 0x11134}, {0x11136, 0x1113F}, {0x11145, 0x11146},

- {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0},

- {0x111C9, 0x111CC}, {0x111CE, 0x111D9}, {0x1122C, 0x11237},

- {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x112F0, 0x112F9},

+ {0x0CF3, 0x0CF3}, {0x0D00, 0x0D03}, {0x0D3B, 0x0D3C},

+ {0x0D3E, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D},

+ {0x0D57, 0x0D57}, {0x0D62, 0x0D63}, {0x0D66, 0x0D6F},

+ {0x0D81, 0x0D83}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},

+ {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},

+ {0x0DF2, 0x0DF3}, {0x0E31, 0x0E31}, {0x0E33, 0x0E3A},

+ {0x0E47, 0x0E4E}, {0x0E50, 0x0E59}, {0x0EB1, 0x0EB1},

+ {0x0EB3, 0x0EBC}, {0x0EC8, 0x0ECE}, {0x0ED0, 0x0ED9},

+ {0x0F18, 0x0F19}, {0x0F20, 0x0F29}, {0x0F35, 0x0F35},

+ {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F},

+ {0x0F71, 0x0F84}, {0x0F86, 0x0F87}, {0x0F8D, 0x0F97},

+ {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102B, 0x103E},

+ {0x1040, 0x1049}, {0x1056, 0x1059}, {0x105E, 0x1060},

+ {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074},

+ {0x1082, 0x108D}, {0x108F, 0x109D}, {0x135D, 0x135F},

+ {0x1369, 0x1371}, {0x1712, 0x1715}, {0x1732, 0x1734},

+ {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3},

+ {0x17DD, 0x17DD}, {0x17E0, 0x17E9}, {0x180B, 0x180D},

+ {0x180F, 0x1819}, {0x18A9, 0x18A9}, {0x1920, 0x192B},

+ {0x1930, 0x193B}, {0x1946, 0x194F}, {0x19D0, 0x19DA},

+ {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E}, {0x1A60, 0x1A7C},

+ {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD},

+ {0x1ABF, 0x1ACE}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44},

+ {0x1B50, 0x1B59}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82},

+ {0x1BA1, 0x1BAD}, {0x1BB0, 0x1BB9}, {0x1BE6, 0x1BF3},

+ {0x1C24, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59},

+ {0x1CD0, 0x1CD2}, {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED},

+ {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF},

+ {0x203F, 0x2040}, {0x2054, 0x2054}, {0x20D0, 0x20DC},

+ {0x20E1, 0x20E1}, {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1},

+ {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},

+ {0x3099, 0x309A}, {0xA620, 0xA629}, {0xA66F, 0xA66F},

+ {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1},

+ {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B},

+ {0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},

+ {0xA8B4, 0xA8C5}, {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1},

+ {0xA8FF, 0xA909}, {0xA926, 0xA92D}, {0xA947, 0xA953},

+ {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9},

+ {0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36},

+ {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59},

+ {0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4},

+ {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1},

+ {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA},

+ {0xABEC, 0xABED}, {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E},

+ {0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0xFE33, 0xFE34},

+ {0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F},

+ {0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0},

+ {0x10376, 0x1037A}, {0x104A0, 0x104A9}, {0x10A01, 0x10A03},

+ {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},

+ {0x10A3F, 0x10A3F}, {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27},

+ {0x10D30, 0x10D39}, {0x10EAB, 0x10EAC}, {0x10EFD, 0x10EFF},

+ {0x10F46, 0x10F50}, {0x10F82, 0x10F85}, {0x11000, 0x11002},

+ {0x11038, 0x11046}, {0x11066, 0x11070}, {0x11073, 0x11074},

+ {0x1107F, 0x11082}, {0x110B0, 0x110BA}, {0x110C2, 0x110C2},

+ {0x110F0, 0x110F9}, {0x11100, 0x11102}, {0x11127, 0x11134},

+ {0x11136, 0x1113F}, {0x11145, 0x11146}, {0x11173, 0x11173},

+ {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC},

+ {0x111CE, 0x111D9}, {0x1122C, 0x11237}, {0x1123E, 0x1123E},

+ {0x11241, 0x11241}, {0x112DF, 0x112EA}, {0x112F0, 0x112F9},

{0x11300, 0x11303}, {0x1133B, 0x1133C}, {0x1133E, 0x11344},

{0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357},

{0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},

@@ -344,21 +348,54 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {

{0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D},

{0x11D3F, 0x11D45}, {0x11D47, 0x11D47}, {0x11D50, 0x11D59},

{0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97},

- {0x11DA0, 0x11DA9}, {0x11EF3, 0x11EF6}, {0x16A60, 0x16A69},

- {0x16AC0, 0x16AC9}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36},

- {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87},

- {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1},

- {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46},

- {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182},

- {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244},

- {0x1D7CE, 0x1D7FF}, {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C},

- {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F},

- {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018},

- {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},

- {0x1E130, 0x1E136}, {0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE},

- {0x1E2EC, 0x1E2F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},

+ {0x11DA0, 0x11DA9}, {0x11EF3, 0x11EF6}, {0x11F00, 0x11F01},

+ {0x11F03, 0x11F03}, {0x11F34, 0x11F3A}, {0x11F3E, 0x11F42},

+ {0x11F50, 0x11F59}, {0x13440, 0x13440}, {0x13447, 0x13455},

+ {0x16A60, 0x16A69}, {0x16AC0, 0x16AC9}, {0x16AF0, 0x16AF4},

+ {0x16B30, 0x16B36}, {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F},

+ {0x16F51, 0x16F87}, {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4},

+ {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D},

+ {0x1CF30, 0x1CF46}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172},

+ {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD},

+ {0x1D242, 0x1D244}, {0x1D7CE, 0x1D7FF}, {0x1DA00, 0x1DA36},

+ {0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84},

+ {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006},

+ {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},

+ {0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F}, {0x1E130, 0x1E136},

+ {0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2F9},

+ {0x1E4EC, 0x1E4F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},

{0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF}};

+// Clang supports the "Mathematical notation profile" as an extension,

+// as described in https://www.unicode.org/L2/L2022/22230-math-profile.pdf

+// Math_Start

+static const llvm::sys::UnicodeCharRange

+ MathematicalNotationProfileIDStartRanges[] = {

+ {0x02202, 0x02202}, // ∂

+ {0x02207, 0x02207}, // ∇

+ {0x0221E, 0x0221E}, // ∞

+ {0x1D6C1, 0x1D6C1}, // 𝛁

+ {0x1D6DB, 0x1D6DB}, // 𝛛

+ {0x1D6FB, 0x1D6FB}, // 𝛻

+ {0x1D715, 0x1D715}, // 𝜕

+ {0x1D735, 0x1D735}, // 𝜵

+ {0x1D74F, 0x1D74F}, // 𝝏

+ {0x1D76F, 0x1D76F}, // 𝝯

+ {0x1D789, 0x1D789}, // 𝞉

+ {0x1D7A9, 0x1D7A9}, // 𝞩

+ {0x1D7C3, 0x1D7C3}, // 𝟃

+};

+// Math_Continue

+static const llvm::sys::UnicodeCharRange

+ MathematicalNotationProfileIDContinueRanges[] = {

+ {0x000B2, 0x000B3}, // ²-³

+ {0x000B9, 0x000B9}, // ¹

+ {0x02070, 0x02070}, // ⁰

+ {0x02074, 0x0207E}, // ⁴-⁾

+ {0x02080, 0x0208E}, // ₀-₎

+};

// C11 D.1, C++11 [charname.allowed]

static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = {

// 1