diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:04 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:11 +0000 |
commit | e3b557809604d036af6e00c60f012c2025b59a5e (patch) | |
tree | 8a11ba2269a3b669601e2fd41145b174008f4da8 /clang/lib/Lex | |
parent | 08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff) |
Diffstat (limited to 'clang/lib/Lex')
-rw-r--r-- | clang/lib/Lex/DependencyDirectivesScanner.cpp | 94 | ||||
-rw-r--r-- | clang/lib/Lex/HeaderMap.cpp | 21 | ||||
-rw-r--r-- | clang/lib/Lex/HeaderSearch.cpp | 220 | ||||
-rw-r--r-- | clang/lib/Lex/InitHeaderSearch.cpp | 146 | ||||
-rw-r--r-- | clang/lib/Lex/Lexer.cpp | 283 | ||||
-rw-r--r-- | clang/lib/Lex/LiteralSupport.cpp | 36 | ||||
-rw-r--r-- | clang/lib/Lex/MacroArgs.cpp | 6 | ||||
-rw-r--r-- | clang/lib/Lex/MacroInfo.cpp | 12 | ||||
-rw-r--r-- | clang/lib/Lex/ModuleMap.cpp | 192 | ||||
-rw-r--r-- | clang/lib/Lex/PPCallbacks.cpp | 5 | ||||
-rw-r--r-- | clang/lib/Lex/PPDirectives.cpp | 119 | ||||
-rw-r--r-- | clang/lib/Lex/PPExpressions.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Lex/PPLexerChange.cpp | 7 | ||||
-rw-r--r-- | clang/lib/Lex/PPMacroExpansion.cpp | 144 | ||||
-rw-r--r-- | clang/lib/Lex/Pragma.cpp | 33 | ||||
-rw-r--r-- | clang/lib/Lex/PreprocessingRecord.cpp | 23 | ||||
-rw-r--r-- | clang/lib/Lex/Preprocessor.cpp | 98 | ||||
-rw-r--r-- | clang/lib/Lex/TokenConcatenation.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Lex/TokenLexer.cpp | 114 | ||||
-rw-r--r-- | clang/lib/Lex/UnicodeCharSets.h | 305 |
20 files changed, 1084 insertions, 778 deletions
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 567ca81f6ac2..0adbaa36bf7c 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" +#include <optional> using namespace clang; using namespace clang::dependency_directives_scan; @@ -81,46 +82,49 @@ struct Scanner { private: /// Lexes next token and advances \p First and the \p Lexer. - LLVM_NODISCARD dependency_directives_scan::Token & + [[nodiscard]] dependency_directives_scan::Token & lexToken(const char *&First, const char *const End); dependency_directives_scan::Token &lexIncludeFilename(const char *&First, const char *const End); + void skipLine(const char *&First, const char *const End); + void skipDirective(StringRef Name, const char *&First, const char *const End); + /// Lexes next token and if it is identifier returns its string, otherwise - /// it skips the current line and returns \p None. + /// it skips the current line and returns \p std::nullopt. /// /// In any case (whatever the token kind) \p First and the \p Lexer will /// advance beyond the token. - LLVM_NODISCARD Optional<StringRef> + [[nodiscard]] std::optional<StringRef> tryLexIdentifierOrSkipLine(const char *&First, const char *const End); /// Used when it is certain that next token is an identifier. - LLVM_NODISCARD StringRef lexIdentifier(const char *&First, - const char *const End); + [[nodiscard]] StringRef lexIdentifier(const char *&First, + const char *const End); /// Lexes next token and returns true iff it is an identifier that matches \p /// Id, otherwise it skips the current line and returns false. /// /// In any case (whatever the token kind) \p First and the \p Lexer will /// advance beyond the token. - LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id, - const char *&First, - const char *const End); - - LLVM_NODISCARD bool scanImpl(const char *First, const char *const End); - LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); - LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); - LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First, + [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id, + const char *&First, + const char *const End); + + [[nodiscard]] bool scanImpl(const char *First, const char *const End); + [[nodiscard]] bool lexPPLine(const char *&First, const char *const End); + [[nodiscard]] bool lexAt(const char *&First, const char *const End); + [[nodiscard]] bool lexModule(const char *&First, const char *const End); + [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First, + const char *const End); + [[nodiscard]] bool lexPragma(const char *&First, const char *const End); + [[nodiscard]] bool lexEndif(const char *&First, const char *const End); + [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First, const char *const End); - LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); - LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); - LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First, - const char *const End); - LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind, - const char *&First, - const char *const End); + [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind, + const char *&First, + const char *const End); void lexPPDirectiveBody(const char *&First, const char *const End); DirectiveWithTokens &pushDirective(DirectiveKind Kind) { @@ -150,6 +154,7 @@ private: DiagnosticsEngine *Diags; SourceLocation InputSourceLoc; + const char *LastTokenPtr = nullptr; /// Keeps track of the tokens for the currently lexed directive. Once a /// directive is fully lexed and "committed" then the tokens get appended to /// \p Tokens and \p CurDirToks is cleared for the next directive. @@ -177,8 +182,8 @@ static void skipOverSpaces(const char *&First, const char *const End) { ++First; } -LLVM_NODISCARD static bool isRawStringLiteral(const char *First, - const char *Current) { +[[nodiscard]] static bool isRawStringLiteral(const char *First, + const char *Current) { assert(First <= Current); // Check if we can even back up. @@ -364,7 +369,7 @@ static bool isQuoteCppDigitSeparator(const char *const Start, return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); } -static void skipLine(const char *&First, const char *const End) { +void Scanner::skipLine(const char *&First, const char *const End) { for (;;) { assert(First <= End); if (First == End) @@ -379,6 +384,7 @@ static void skipLine(const char *&First, const char *const End) { // Iterate over strings correctly to avoid comments and newlines. if (*First == '"' || (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { + LastTokenPtr = First; if (isRawStringLiteral(Start, First)) skipRawString(First, End); else @@ -388,6 +394,7 @@ static void skipLine(const char *&First, const char *const End) { // Iterate over comments correctly. if (*First != '/' || End - First < 2) { + LastTokenPtr = First; ++First; continue; } @@ -399,6 +406,7 @@ static void skipLine(const char *&First, const char *const End) { } if (First[1] != '*') { + LastTokenPtr = First; ++First; continue; } @@ -416,8 +424,8 @@ static void skipLine(const char *&First, const char *const End) { } } -static void skipDirective(StringRef Name, const char *&First, - const char *const End) { +void Scanner::skipDirective(StringRef Name, const char *&First, + const char *const End) { if (llvm::StringSwitch<bool>(Name) .Case("warning", true) .Case("error", true) @@ -517,13 +525,13 @@ void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { } } -LLVM_NODISCARD Optional<StringRef> +[[nodiscard]] std::optional<StringRef> Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { const dependency_directives_scan::Token &Tok = lexToken(First, End); if (Tok.isNot(tok::raw_identifier)) { if (!Tok.is(tok::eod)) skipLine(First, End); - return None; + return std::nullopt; } bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; @@ -548,14 +556,15 @@ Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { } StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { - Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); + std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); assert(Id && "expected identifier token"); - return Id.value(); + return *Id; } bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, const char *const End) { - if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) { + if (std::optional<StringRef> FoundId = + tryLexIdentifierOrSkipLine(First, End)) { if (*FoundId == Id) return true; skipLine(First, End); @@ -581,7 +590,7 @@ bool Scanner::lexModule(const char *&First, const char *const End) { bool Export = false; if (Id == "export") { Export = true; - Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); + std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); if (!NextId) return false; Id = *NextId; @@ -621,7 +630,7 @@ bool Scanner::lexModule(const char *&First, const char *const End) { } bool Scanner::lexPragma(const char *&First, const char *const End) { - Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); + std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); if (!FoundId) return false; @@ -710,6 +719,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { return false; } + LastTokenPtr = First; + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); auto ScEx1 = make_scope_exit([&]() { @@ -733,10 +744,18 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { // Lex '#'. const dependency_directives_scan::Token &HashTok = lexToken(First, End); + if (HashTok.is(tok::hashhash)) { + // A \p tok::hashhash at this location is passed by the preprocessor to the + // parser to interpret, like any other token. So for dependency scanning + // skip it like a normal token not affecting the preprocessor. + skipLine(First, End); + assert(First <= End); + return false; + } assert(HashTok.is(tok::hash)); (void)HashTok; - Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); + std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); if (!FoundId) return false; @@ -803,6 +822,9 @@ bool Scanner::scan(SmallVectorImpl<Directive> &Directives) { if (!Error) { // Add an EOF on success. + if (LastTokenPtr && + (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) + pushDirective(tokens_present_before_eof); pushDirective(pp_eof); } @@ -851,7 +873,9 @@ void clang::printDependencyDirectivesAsSource( }; for (const dependency_directives_scan::Directive &Directive : Directives) { - Optional<tok::TokenKind> PrevTokenKind; + if (Directive.Kind == tokens_present_before_eof) + OS << "<TokBeforeEOF>"; + std::optional<tok::TokenKind> PrevTokenKind; for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) OS << ' '; diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp index 0001fc348eda..bb50a4eef65c 100644 --- a/clang/lib/Lex/HeaderMap.cpp +++ b/clang/lib/Lex/HeaderMap.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Debug.h" #include <cstring> #include <memory> +#include <optional> using namespace clang; /// HashHMapKey - This is the 'well known' hash function required by the file @@ -145,13 +146,13 @@ HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const { return Result; } -Optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const { +std::optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const { // Add the start of the string table to the idx. StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset); // Check for invalid index. if (StrTabIdx >= FileBuffer->getBufferSize()) - return None; + return std::nullopt; const char *Data = FileBuffer->getBufferStart() + StrTabIdx; unsigned MaxLen = FileBuffer->getBufferSize() - StrTabIdx; @@ -159,7 +160,7 @@ Optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const { // Check whether the buffer is null-terminated. if (Len == MaxLen && Data[Len - 1]) - return None; + return std::nullopt; return StringRef(Data, Len); } @@ -177,7 +178,7 @@ LLVM_DUMP_METHOD void HeaderMapImpl::dump() const { << ", " << getEndianAdjustedWord(Hdr.NumEntries) << "\n"; auto getStringOrInvalid = [this](unsigned Id) -> StringRef { - if (Optional<StringRef> S = getString(Id)) + if (std::optional<StringRef> S = getString(Id)) return *S; return "<invalid>"; }; @@ -208,7 +209,7 @@ StringRef HeaderMapImpl::lookupFilename(StringRef Filename, if (B.Key == HMAP_EmptyBucketKey) return StringRef(); // Hash miss. // See if the key matches. If not, probe on. - Optional<StringRef> Key = getString(B.Key); + std::optional<StringRef> Key = getString(B.Key); if (LLVM_UNLIKELY(!Key)) continue; if (!Filename.equals_insensitive(*Key)) @@ -216,8 +217,8 @@ StringRef HeaderMapImpl::lookupFilename(StringRef Filename, // If so, we have a match in the hash table. Construct the destination // path. - Optional<StringRef> Prefix = getString(B.Prefix); - Optional<StringRef> Suffix = getString(B.Suffix); + std::optional<StringRef> Prefix = getString(B.Prefix); + std::optional<StringRef> Suffix = getString(B.Suffix); DestPath.clear(); if (LLVM_LIKELY(Prefix && Suffix)) { @@ -240,9 +241,9 @@ StringRef HeaderMapImpl::reverseLookupFilename(StringRef DestPath) const { if (B.Key == HMAP_EmptyBucketKey) continue; - Optional<StringRef> Key = getString(B.Key); - Optional<StringRef> Prefix = getString(B.Prefix); - Optional<StringRef> Suffix = getString(B.Suffix); + std::optional<StringRef> Key = getString(B.Key); + std::optional<StringRef> Prefix = getString(B.Prefix); + std::optional<StringRef> Suffix = getString(B.Suffix); if (LLVM_LIKELY(Key && Prefix && Suffix)) { SmallVector<char, 1024> Buf; Buf.append(Prefix->begin(), Prefix->end()); diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 60fd42bc1127..074c147ba3c5 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -116,6 +116,7 @@ void HeaderSearch::SetSearchPaths( NoCurDirSearch = noCurDirSearch; SearchDirToHSEntry = std::move(searchDirToHSEntry); //LookupFileCache.clear(); + indexInitialHeaderMaps(); } void HeaderSearch::AddSearchPath(const DirectoryLookup &dir, bool isAngled) { @@ -170,11 +171,11 @@ void HeaderSearch::getHeaderMapFileNames( } std::string HeaderSearch::getCachedModuleFileName(Module *Module) { - const FileEntry *ModuleMap = + OptionalFileEntryRef ModuleMap = getModuleMap().getModuleMapFileForUniquing(Module); // The ModuleMap maybe a nullptr, when we load a cached C++ module without // *.modulemap file. In this case, just return an empty string. - if (ModuleMap == nullptr) + if (!ModuleMap) return {}; return getCachedModuleFileName(Module->Name, ModuleMap->getName()); } @@ -211,7 +212,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, } std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) { - const FileEntry *ModuleMap = + OptionalFileEntryRef ModuleMap = getModuleMap().getModuleMapFileForUniquing(Module); StringRef ModuleName = Module->Name; StringRef ModuleMapPath = ModuleMap->getName(); @@ -255,18 +256,11 @@ std::string HeaderSearch::getCachedModuleFileNameImpl(StringRef ModuleName, // // To avoid false-negatives, we form as canonical a path as we can, and map // to lower-case in case we're on a case-insensitive file system. - std::string Parent = - std::string(llvm::sys::path::parent_path(ModuleMapPath)); - if (Parent.empty()) - Parent = "."; - auto Dir = FileMgr.getDirectory(Parent); - if (!Dir) + SmallString<128> CanonicalPath(ModuleMapPath); + if (getModuleMap().canonicalizeModuleMapPath(CanonicalPath)) return {}; - auto DirName = FileMgr.getCanonicalName(*Dir); - auto FileName = llvm::sys::path::filename(ModuleMapPath); - llvm::hash_code Hash = - llvm::hash_combine(DirName.lower(), FileName.lower()); + llvm::hash_code Hash = llvm::hash_combine(CanonicalPath.str().lower()); SmallString<128> HashStr; llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36); @@ -311,7 +305,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, // Look through the various header search paths to load any available module // maps, searching for a module map that describes this module. - for (DirectoryLookup Dir : search_dir_range()) { + for (DirectoryLookup &Dir : search_dir_range()) { if (Dir.isFramework()) { // Search for or infer a module map for a framework. Here we use // SearchName rather than ModuleName, to permit finding private modules @@ -335,7 +329,8 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, continue; bool IsSystem = Dir.isSystemHeaderDirectory(); - // Only returns None if not a normal directory, which we just checked + // Only returns std::nullopt if not a normal directory, which we just + // checked DirectoryEntryRef NormalDir = *Dir.getDirRef(); // Search for a module map file in this directory. if (loadModuleMapFile(NormalDir, IsSystem, @@ -379,6 +374,31 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, return Module; } +void HeaderSearch::indexInitialHeaderMaps() { + llvm::StringMap<unsigned, llvm::BumpPtrAllocator> Index(SearchDirs.size()); + + // Iterate over all filename keys and associate them with the index i. + unsigned i = 0; + for (; i != SearchDirs.size(); ++i) { + auto &Dir = SearchDirs[i]; + + // We're concerned with only the initial contiguous run of header + // maps within SearchDirs, which can be 99% of SearchDirs when + // SearchDirs.size() is ~10000. + if (!Dir.isHeaderMap()) + break; + + // Give earlier keys precedence over identical later keys. + auto Callback = [&](StringRef Filename) { + Index.try_emplace(Filename.lower(), i); + }; + Dir.getHeaderMap()->forEachKey(Callback); + } + + SearchDirHeaderMapIndex = std::move(Index); + FirstNonHeaderMapSearchDirIdx = i; +} + //===----------------------------------------------------------------------===// // File lookup within a DirectoryLookup scope //===----------------------------------------------------------------------===// @@ -395,13 +415,14 @@ StringRef DirectoryLookup::getName() const { return getHeaderMap()->getFileName(); } -Optional<FileEntryRef> HeaderSearch::getFileAndSuggestModule( +OptionalFileEntryRef HeaderSearch::getFileAndSuggestModule( StringRef FileName, SourceLocation IncludeLoc, const DirectoryEntry *Dir, bool IsSystemHeaderDir, Module *RequestingModule, - ModuleMap::KnownHeader *SuggestedModule) { + ModuleMap::KnownHeader *SuggestedModule, bool OpenFile /*=true*/, + bool CacheFailures /*=true*/) { // If we have a module map that might map this header, load it and // check whether we'll have a suggestion for a module. - auto File = getFileMgr().getFileRef(FileName, /*OpenFile=*/true); + auto File = getFileMgr().getFileRef(FileName, OpenFile, CacheFailures); if (!File) { // For rare, surprising errors (e.g. "out of file handles"), diag the EC // message. @@ -412,26 +433,27 @@ Optional<FileEntryRef> HeaderSearch::getFileAndSuggestModule( Diags.Report(IncludeLoc, diag::err_cannot_open_file) << FileName << EC.message(); } - return None; + return std::nullopt; } // If there is a module that corresponds to this header, suggest it. if (!findUsableModuleForHeader( &File->getFileEntry(), Dir ? Dir : File->getFileEntry().getDir(), RequestingModule, SuggestedModule, IsSystemHeaderDir)) - return None; + return std::nullopt; return *File; } /// LookupFile - Lookup the specified file in this search path, returning it /// if it exists or returning null if not. -Optional<FileEntryRef> DirectoryLookup::LookupFile( +OptionalFileEntryRef DirectoryLookup::LookupFile( StringRef &Filename, HeaderSearch &HS, SourceLocation IncludeLoc, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound, - bool &IsInHeaderMap, SmallVectorImpl<char> &MappedName) const { + bool &IsInHeaderMap, SmallVectorImpl<char> &MappedName, + bool OpenFile) const { InUserSpecifiedSystemFramework = false; IsInHeaderMap = false; MappedName.clear(); @@ -451,9 +473,9 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile( RelativePath->append(Filename.begin(), Filename.end()); } - return HS.getFileAndSuggestModule(TmpDir, IncludeLoc, getDir(), - isSystemHeaderDirectory(), - RequestingModule, SuggestedModule); + return HS.getFileAndSuggestModule( + TmpDir, IncludeLoc, getDir(), isSystemHeaderDirectory(), + RequestingModule, SuggestedModule, OpenFile); } if (isFramework()) @@ -466,7 +488,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile( SmallString<1024> Path; StringRef Dest = HM->lookupFilename(Filename, Path); if (Dest.empty()) - return None; + return std::nullopt; IsInHeaderMap = true; @@ -491,7 +513,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile( Dest = HM->lookupFilename(Filename, Path); } - if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest)) { + if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest, OpenFile)) { FixupSearchPath(); return *Res; } @@ -501,7 +523,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile( // function as part of the regular logic that applies to include search paths. // The case where the target file **does not exist** is handled here: HS.noteLookupUsage(HS.searchDirIdx(*this), IncludeLoc); - return None; + return std::nullopt; } /// Given a framework directory, find the top-most framework directory. @@ -510,7 +532,7 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile( /// \param DirName The name of the framework directory. /// \param SubmodulePath Will be populated with the submodule path from the /// returned top-level module to the originally named framework. -static Optional<DirectoryEntryRef> +static OptionalDirectoryEntryRef getTopFrameworkDir(FileManager &FileMgr, StringRef DirName, SmallVectorImpl<std::string> &SubmodulePath) { assert(llvm::sys::path::extension(DirName) == ".framework" && @@ -564,7 +586,7 @@ static bool needModuleLookup(Module *RequestingModule, /// DoFrameworkLookup - Do a lookup of the specified file in the current /// DirectoryLookup, which is a framework directory. -Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup( +OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup( StringRef Filename, HeaderSearch &HS, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, @@ -574,7 +596,7 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup( // Framework names must have a '/' in the filename. size_t SlashPos = Filename.find('/'); if (SlashPos == StringRef::npos) - return None; + return std::nullopt; // Find out if this is the home for the specified framework, by checking // HeaderSearch. Possible answers are yes/no and unknown. @@ -583,7 +605,7 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup( // If it is known and in some other directory, fail. if (CacheEntry.Directory && CacheEntry.Directory != getFrameworkDirRef()) - return None; + return std::nullopt; // Otherwise, construct the path to this framework dir. @@ -607,7 +629,7 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup( // If the framework dir doesn't exist, we fail. auto Dir = FileMgr.getDirectory(FrameworkName); if (!Dir) - return None; + return std::nullopt; // Otherwise, if it does, remember that this is the right direntry for this // framework. @@ -690,17 +712,17 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup( if (!HS.findUsableModuleForFrameworkHeader( &File->getFileEntry(), FrameworkPath, RequestingModule, SuggestedModule, IsSystem)) - return None; + return std::nullopt; } else { if (!HS.findUsableModuleForHeader(&File->getFileEntry(), getDir(), RequestingModule, SuggestedModule, IsSystem)) - return None; + return std::nullopt; } } if (File) return *File; - return None; + return std::nullopt; } void HeaderSearch::cacheLookupSuccess(LookupFileCacheInfo &CacheLookup, @@ -833,14 +855,14 @@ diagnoseFrameworkInclude(DiagnosticsEngine &Diags, SourceLocation IncludeLoc, /// for system \#include's or not (i.e. using <> instead of ""). Includers, if /// non-empty, indicates where the \#including file(s) are, in case a relative /// search is needed. Microsoft mode will pass all \#including files. -Optional<FileEntryRef> HeaderSearch::LookupFile( +OptionalFileEntryRef HeaderSearch::LookupFile( StringRef Filename, SourceLocation IncludeLoc, bool isAngled, ConstSearchDirIterator FromDir, ConstSearchDirIterator *CurDirArg, ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache, - bool BuildSystemModule) { + bool BuildSystemModule, bool OpenFile, bool CacheFailures) { ConstSearchDirIterator CurDirLocal = nullptr; ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal; @@ -859,7 +881,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( // If this was an #include_next "/absolute/file", fail. if (FromDir) - return None; + return std::nullopt; if (SearchPath) SearchPath->clear(); @@ -869,13 +891,14 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( } // Otherwise, just return the file. return getFileAndSuggestModule(Filename, IncludeLoc, nullptr, - /*IsSystemHeaderDir*/false, - RequestingModule, SuggestedModule); + /*IsSystemHeaderDir*/ false, + RequestingModule, SuggestedModule, OpenFile, + CacheFailures); } // This is the header that MSVC's header search would have found. ModuleMap::KnownHeader MSSuggestedModule; - Optional<FileEntryRef> MSFE; + OptionalFileEntryRef MSFE; // Unless disabled, check to see if the file is in the #includer's // directory. This cannot be based on CurDir, because each includer could be @@ -904,7 +927,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( bool IncluderIsSystemHeader = Includer ? getFileInfo(Includer).DirInfo != SrcMgr::C_User : BuildSystemModule; - if (Optional<FileEntryRef> FE = getFileAndSuggestModule( + if (OptionalFileEntryRef FE = getFileAndSuggestModule( TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader, RequestingModule, SuggestedModule)) { if (!Includer) { @@ -981,24 +1004,37 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( ConstSearchDirIterator NextIt = std::next(It); - // If the entry has been previously looked up, the first value will be - // non-zero. If the value is equal to i (the start point of our search), then - // this is a matching hit. - if (!SkipCache && CacheLookup.StartIt == NextIt) { - // Skip querying potentially lots of directories for this lookup. - if (CacheLookup.HitIt) - It = CacheLookup.HitIt; - if (CacheLookup.MappedName) { - Filename = CacheLookup.MappedName; - if (IsMapped) - *IsMapped = true; + if (!SkipCache) { + if (CacheLookup.StartIt == NextIt) { + // HIT: Skip querying potentially lots of directories for this lookup. + if (CacheLookup.HitIt) + It = CacheLookup.HitIt; + if (CacheLookup.MappedName) { + Filename = CacheLookup.MappedName; + if (IsMapped) + *IsMapped = true; + } + } else { + // MISS: This is the first query, or the previous query didn't match + // our search start. We will fill in our found location below, so prime + // the start point value. + CacheLookup.reset(/*NewStartIt=*/NextIt); + + if (It == search_dir_begin() && FirstNonHeaderMapSearchDirIdx > 0) { + // Handle cold misses of user includes in the presence of many header + // maps. We avoid searching perhaps thousands of header maps by + // jumping directly to the correct one or jumping beyond all of them. + auto Iter = SearchDirHeaderMapIndex.find(Filename.lower()); + if (Iter == SearchDirHeaderMapIndex.end()) + // Not in index => Skip to first SearchDir after initial header maps + It = search_dir_nth(FirstNonHeaderMapSearchDirIdx); + else + // In index => Start with a specific header map + It = search_dir_nth(Iter->second); + } } - } else { - // Otherwise, this is the first query, or the previous query didn't match - // our search start. We will fill in our found location below, so prime the - // start point value. + } else CacheLookup.reset(/*NewStartIt=*/NextIt); - } SmallString<64> MappedName; @@ -1007,10 +1043,10 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( bool InUserSpecifiedSystemFramework = false; bool IsInHeaderMap = false; bool IsFrameworkFoundInDir = false; - Optional<FileEntryRef> File = It->LookupFile( + OptionalFileEntryRef File = It->LookupFile( Filename, *this, IncludeLoc, SearchPath, RelativePath, RequestingModule, SuggestedModule, InUserSpecifiedSystemFramework, IsFrameworkFoundInDir, - IsInHeaderMap, MappedName); + IsInHeaderMap, MappedName, OpenFile); if (!MappedName.empty()) { assert(IsInHeaderMap && "MappedName should come from a header map"); CacheLookup.MappedName = @@ -1102,7 +1138,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( ScratchFilename += '/'; ScratchFilename += Filename; - Optional<FileEntryRef> File = LookupFile( + OptionalFileEntryRef File = LookupFile( ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, &CurDir, Includers.front(), SearchPath, RelativePath, RequestingModule, SuggestedModule, IsMapped, /*IsFrameworkFound=*/nullptr); @@ -1131,7 +1167,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( // Otherwise, didn't find it. Remember we didn't find this. CacheLookup.HitIt = search_dir_end(); - return None; + return std::nullopt; } /// LookupSubframeworkHeader - Look up a subframework for the specified @@ -1139,7 +1175,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile( /// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox /// is a subframework within Carbon.framework. If so, return the FileEntry /// for the designated file, otherwise return null. -Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( +OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader( StringRef Filename, const FileEntry *ContextFileEnt, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule) { @@ -1149,7 +1185,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( // FIXME: Should we permit '\' on Windows? size_t SlashPos = Filename.find('/'); if (SlashPos == StringRef::npos) - return None; + return std::nullopt; // Look up the base framework name of the ContextFileEnt. StringRef ContextName = ContextFileEnt->getName(); @@ -1160,7 +1196,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( if (FrameworkPos == StringRef::npos || (ContextName[FrameworkPos + DotFrameworkLen] != '/' && ContextName[FrameworkPos + DotFrameworkLen] != '\\')) - return None; + return std::nullopt; SmallString<1024> FrameworkName(ContextName.data(), ContextName.data() + FrameworkPos + @@ -1180,7 +1216,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( CacheLookup.first().size() == FrameworkName.size() && memcmp(CacheLookup.first().data(), &FrameworkName[0], CacheLookup.first().size()) != 0) - return None; + return std::nullopt; // Cache subframework. if (!CacheLookup.second.Directory) { @@ -1189,7 +1225,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( // If the framework dir doesn't exist, we fail. auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkName); if (!Dir) - return None; + return std::nullopt; // Otherwise, if it does, remember that this is the right direntry for this // framework. @@ -1227,7 +1263,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( File = FileMgr.getOptionalFileRef(HeadersFilename, /*OpenFile=*/true); if (!File) - return None; + return std::nullopt; } // This file is a system header or C++ unfriendly if the old file is. @@ -1242,7 +1278,7 @@ Optional<FileEntryRef> HeaderSearch::LookupSubframeworkHeader( if (!findUsableModuleForFrameworkHeader(&File->getFileEntry(), FrameworkName, RequestingModule, SuggestedModule, /*IsSystem*/ false)) - return None; + return std::nullopt; return *File; } @@ -1348,7 +1384,7 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) { void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE, ModuleMap::ModuleHeaderRole Role, bool isCompilingModuleHeader) { - bool isModularHeader = !(Role & ModuleMap::TextualHeader); + bool isModularHeader = ModuleMap::isModular(Role); // Don't mark the file info as non-external if there's nothing to change. if (!isCompilingModuleHeader) { @@ -1519,14 +1555,14 @@ bool HeaderSearch::hasModuleMap(StringRef FileName, } ModuleMap::KnownHeader -HeaderSearch::findModuleForHeader(const FileEntry *File, - bool AllowTextual) const { +HeaderSearch::findModuleForHeader(const FileEntry *File, bool AllowTextual, + bool AllowExcluded) const { if (ExternalSource) { // Make sure the external source has handled header info about this file, // which includes whether the file is part of a module. (void)getExistingFileInfo(File); } - return ModMap.findModuleForHeader(File, AllowTextual); + return ModMap.findModuleForHeader(File, AllowTextual, AllowExcluded); } ArrayRef<ModuleMap::KnownHeader> @@ -1560,6 +1596,8 @@ static bool suggestModule(HeaderSearch &HS, const FileEntry *File, *SuggestedModule = ModuleMap::KnownHeader(); return true; } + // TODO: Add this module (or just its module map file) into something like + // `RequestingModule->AffectingClangModules`. return false; } } @@ -1590,7 +1628,7 @@ bool HeaderSearch::findUsableModuleForFrameworkHeader( if (needModuleLookup(RequestingModule, SuggestedModule)) { // Find the top-level framework based on this framework. SmallVector<std::string, 4> SubmodulePath; - Optional<DirectoryEntryRef> TopFrameworkDir = + OptionalDirectoryEntryRef TopFrameworkDir = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath); assert(TopFrameworkDir && "Could not find the top-most framework dir"); @@ -1630,7 +1668,7 @@ bool HeaderSearch::loadModuleMapFile(const FileEntry *File, bool IsSystem, StringRef OriginalModuleMapFile) { // Find the directory for the module. For frameworks, that may require going // up from the 'Modules' directory. - Optional<DirectoryEntryRef> Dir; + OptionalDirectoryEntryRef Dir; if (getHeaderSearchOpts().ModuleMapFileHomeIsCwd) { Dir = FileMgr.getOptionalDirectoryRef("."); } else { @@ -1891,32 +1929,28 @@ std::string HeaderSearch::suggestPathToFileForDiagnostics( llvm::StringRef File, llvm::StringRef WorkingDir, llvm::StringRef MainFile, bool *IsSystem) { using namespace llvm::sys; + + llvm::SmallString<32> FilePath = File; + // remove_dots switches to backslashes on windows as a side-effect! + // We always want to suggest forward slashes for includes. + // (not remove_dots(..., posix) as that misparses windows paths). + path::remove_dots(FilePath, /*remove_dot_dot=*/true); + path::native(FilePath, path::Style::posix); + File = FilePath; unsigned BestPrefixLength = 0; // Checks whether `Dir` is a strict path prefix of `File`. If so and that's // the longest prefix we've seen so for it, returns true and updates the // `BestPrefixLength` accordingly. - auto CheckDir = [&](llvm::StringRef Dir) -> bool { - llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); + auto CheckDir = [&](llvm::SmallString<32> Dir) -> bool { if (!WorkingDir.empty() && !path::is_absolute(Dir)) - fs::make_absolute(WorkingDir, DirPath); - path::remove_dots(DirPath, /*remove_dot_dot=*/true); - Dir = DirPath; + fs::make_absolute(WorkingDir, Dir); + path::remove_dots(Dir, /*remove_dot_dot=*/true); for (auto NI = path::begin(File), NE = path::end(File), DI = path::begin(Dir), DE = path::end(Dir); - /*termination condition in loop*/; ++NI, ++DI) { - // '.' components in File are ignored. - while (NI != NE && *NI == ".") - ++NI; - if (NI == NE) - break; - - // '.' components in Dir are ignored. - while (DI != DE && *DI == ".") - ++DI; + NI != NE; ++NI, ++DI) { if (DI == DE) { - // Dir is a prefix of File, up to '.' components and choice of path - // separators. + // Dir is a prefix of File, up to choice of path separators. unsigned PrefixLength = NI - path::begin(File); if (PrefixLength > BestPrefixLength) { BestPrefixLength = PrefixLength; diff --git a/clang/lib/Lex/InitHeaderSearch.cpp b/clang/lib/Lex/InitHeaderSearch.cpp index 158b5667151f..d4465565718e 100644 --- a/clang/lib/Lex/InitHeaderSearch.cpp +++ b/clang/lib/Lex/InitHeaderSearch.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include <optional> using namespace clang; using namespace clang::frontend; @@ -35,16 +36,16 @@ namespace { struct DirectoryLookupInfo { IncludeDirGroup Group; DirectoryLookup Lookup; - Optional<unsigned> UserEntryIdx; + std::optional<unsigned> UserEntryIdx; DirectoryLookupInfo(IncludeDirGroup Group, DirectoryLookup Lookup, - Optional<unsigned> UserEntryIdx) + std::optional<unsigned> UserEntryIdx) : Group(Group), Lookup(Lookup), UserEntryIdx(UserEntryIdx) {} }; -/// InitHeaderSearch - This class makes it easier to set the search paths of -/// a HeaderSearch object. InitHeaderSearch stores several search path lists -/// internally, which can be sent to a HeaderSearch object in one swoop. +/// This class makes it easier to set the search paths of a HeaderSearch object. +/// InitHeaderSearch stores several search path lists internally, which can be +/// sent to a HeaderSearch object in one swoop. class InitHeaderSearch { std::vector<DirectoryLookupInfo> IncludePath; std::vector<std::pair<std::string, bool> > SystemHeaderPrefixes; @@ -58,56 +59,54 @@ public: : Headers(HS), Verbose(verbose), IncludeSysroot(std::string(sysroot)), HasSysroot(!(sysroot.empty() || sysroot == "/")) {} - /// AddPath - Add the specified path to the specified group list, prefixing - /// the sysroot if used. + /// Add the specified path to the specified group list, prefixing the sysroot + /// if used. /// Returns true if the path exists, false if it was ignored. bool AddPath(const Twine &Path, IncludeDirGroup Group, bool isFramework, - Optional<unsigned> UserEntryIdx = None); + std::optional<unsigned> UserEntryIdx = std::nullopt); - /// AddUnmappedPath - Add the specified path to the specified group list, - /// without performing any sysroot remapping. + /// Add the specified path to the specified group list, without performing any + /// sysroot remapping. /// Returns true if the path exists, false if it was ignored. bool AddUnmappedPath(const Twine &Path, IncludeDirGroup Group, bool isFramework, - Optional<unsigned> UserEntryIdx = None); + std::optional<unsigned> UserEntryIdx = std::nullopt); - /// AddSystemHeaderPrefix - Add the specified prefix to the system header - /// prefix list. + /// Add the specified prefix to the system header prefix list. void AddSystemHeaderPrefix(StringRef Prefix, bool IsSystemHeader) { SystemHeaderPrefixes.emplace_back(std::string(Prefix), IsSystemHeader); } - /// AddGnuCPlusPlusIncludePaths - Add the necessary paths to support a gnu - /// libstdc++. + /// Add the necessary paths to support a gnu libstdc++. /// Returns true if the \p Base path was found, false if it does not exist. bool AddGnuCPlusPlusIncludePaths(StringRef Base, StringRef ArchDir, StringRef Dir32, StringRef Dir64, const llvm::Triple &triple); - /// AddMinGWCPlusPlusIncludePaths - Add the necessary paths to support a MinGW - /// libstdc++. + /// Add the necessary paths to support a MinGW libstdc++. void AddMinGWCPlusPlusIncludePaths(StringRef Base, StringRef Arch, StringRef Version); - // AddDefaultCIncludePaths - Add paths that should always be searched. + /// Add paths that should always be searched. void AddDefaultCIncludePaths(const llvm::Triple &triple, const HeaderSearchOptions &HSOpts); - // AddDefaultCPlusPlusIncludePaths - Add paths that should be searched when - // compiling c++. + /// Add paths that should be searched when compiling c++. void AddDefaultCPlusPlusIncludePaths(const LangOptions &LangOpts, const llvm::Triple &triple, const HeaderSearchOptions &HSOpts); - /// AddDefaultSystemIncludePaths - Adds the default system include paths so - /// that e.g. stdio.h is found. + /// Returns true iff AddDefaultIncludePaths should do anything. If this + /// returns false, include paths should instead be handled in the driver. + bool ShouldAddDefaultIncludePaths(const llvm::Triple &triple); + + /// Adds the default system include paths so that e.g. stdio.h is found. void AddDefaultIncludePaths(const LangOptions &Lang, const llvm::Triple &triple, const HeaderSearchOptions &HSOpts); - /// Realize - Merges all search path lists into one list and send it to - /// HeaderSearch. + /// Merges all search path lists into one list and send it to HeaderSearch. void Realize(const LangOptions &Lang); }; @@ -123,7 +122,7 @@ static bool CanPrefixSysroot(StringRef Path) { bool InitHeaderSearch::AddPath(const Twine &Path, IncludeDirGroup Group, bool isFramework, - Optional<unsigned> UserEntryIdx) { + std::optional<unsigned> UserEntryIdx) { // Add the path with sysroot prepended, if desired and this is a system header // group. if (HasSysroot) { @@ -140,7 +139,7 @@ bool InitHeaderSearch::AddPath(const Twine &Path, IncludeDirGroup Group, bool InitHeaderSearch::AddUnmappedPath(const Twine &Path, IncludeDirGroup Group, bool isFramework, - Optional<unsigned> UserEntryIdx) { + std::optional<unsigned> UserEntryIdx) { assert(!Path.isTriviallyEmpty() && "can't handle empty path here"); FileManager &FM = Headers.getFileMgr(); @@ -225,28 +224,23 @@ void InitHeaderSearch::AddMinGWCPlusPlusIncludePaths(StringRef Base, void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple, const HeaderSearchOptions &HSOpts) { - llvm::Triple::OSType os = triple.getOS(); - - if (triple.isOSDarwin()) { + if (!ShouldAddDefaultIncludePaths(triple)) llvm_unreachable("Include management is handled in the driver."); - } + + llvm::Triple::OSType os = triple.getOS(); if (HSOpts.UseStandardSystemIncludes) { switch (os) { case llvm::Triple::CloudABI: - case llvm::Triple::FreeBSD: - case llvm::Triple::NetBSD: - case llvm::Triple::OpenBSD: case llvm::Triple::NaCl: case llvm::Triple::PS4: case llvm::Triple::PS5: case llvm::Triple::ELFIAMCU: - case llvm::Triple::Fuchsia: break; case llvm::Triple::Win32: if (triple.getEnvironment() != llvm::Triple::Cygnus) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; default: // FIXME: temporary hack: hard-coded paths. AddPath("/usr/local/include", System, false); @@ -280,12 +274,6 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple, } switch (os) { - case llvm::Triple::Linux: - case llvm::Triple::Hurd: - case llvm::Triple::Solaris: - case llvm::Triple::OpenBSD: - llvm_unreachable("Include management is handled in the driver."); - case llvm::Triple::CloudABI: { // <sysroot>/<triple>/include SmallString<128> P = StringRef(HSOpts.ResourceDir); @@ -350,7 +338,6 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple, case llvm::Triple::RTEMS: case llvm::Triple::NaCl: case llvm::Triple::ELFIAMCU: - case llvm::Triple::Fuchsia: break; case llvm::Triple::PS4: case llvm::Triple::PS5: { @@ -386,20 +373,12 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple, void InitHeaderSearch::AddDefaultCPlusPlusIncludePaths( const LangOptions &LangOpts, const llvm::Triple &triple, const HeaderSearchOptions &HSOpts) { - llvm::Triple::OSType os = triple.getOS(); - // FIXME: temporary hack: hard-coded paths. - - if (triple.isOSDarwin()) { + if (!ShouldAddDefaultIncludePaths(triple)) llvm_unreachable("Include management is handled in the driver."); - } + // FIXME: temporary hack: hard-coded paths. + llvm::Triple::OSType os = triple.getOS(); switch (os) { - case llvm::Triple::Linux: - case llvm::Triple::Hurd: - case llvm::Triple::Solaris: - case llvm::Triple::AIX: - llvm_unreachable("Include management is handled in the driver."); - break; case llvm::Triple::Win32: switch (triple.getEnvironment()) { default: llvm_unreachable("Include management is handled in the driver."); @@ -425,44 +404,61 @@ void InitHeaderSearch::AddDefaultCPlusPlusIncludePaths( } } -void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang, - const llvm::Triple &triple, - const HeaderSearchOptions &HSOpts) { - // NB: This code path is going away. All of the logic is moving into the - // driver which has the information necessary to do target-specific - // selections of default include paths. Each target which moves there will be - // exempted from this logic here until we can delete the entire pile of code. +bool InitHeaderSearch::ShouldAddDefaultIncludePaths( + const llvm::Triple &triple) { switch (triple.getOS()) { - default: - break; // Everything else continues to use this routine's logic. - + case llvm::Triple::AIX: case llvm::Triple::Emscripten: - case llvm::Triple::Linux: - case llvm::Triple::Hurd: + case llvm::Triple::FreeBSD: + case llvm::Triple::NetBSD: case llvm::Triple::OpenBSD: + case llvm::Triple::Fuchsia: + case llvm::Triple::Hurd: + case llvm::Triple::Linux: case llvm::Triple::Solaris: case llvm::Triple::WASI: - case llvm::Triple::AIX: - return; + return false; case llvm::Triple::Win32: if (triple.getEnvironment() != llvm::Triple::Cygnus || triple.isOSBinFormatMachO()) - return; + return false; break; case llvm::Triple::UnknownOS: if (triple.isWasm()) - return; + return false; + break; + + default: break; } - // All header search logic is handled in the Driver for Darwin. + return true; // Everything else uses AddDefaultIncludePaths(). +} + +void InitHeaderSearch::AddDefaultIncludePaths( + const LangOptions &Lang, const llvm::Triple &triple, + const HeaderSearchOptions &HSOpts) { + // NB: This code path is going away. All of the logic is moving into the + // driver which has the information necessary to do target-specific + // selections of default include paths. Each target which moves there will be + // exempted from this logic in ShouldAddDefaultIncludePaths() until we can + // delete the entire pile of code. + if (!ShouldAddDefaultIncludePaths(triple)) + return; + + // NOTE: some additional header search logic is handled in the driver for + // Darwin. if (triple.isOSDarwin()) { if (HSOpts.UseStandardSystemIncludes) { // Add the default framework include paths on Darwin. - AddPath("/System/Library/Frameworks", System, true); - AddPath("/Library/Frameworks", System, true); + if (triple.isDriverKit()) { + AddPath("/System/DriverKit/System/Library/Frameworks", System, true); + } else { + AddPath("/System/Library/Frameworks", System, true); + AddPath("/Library/Frameworks", System, true); + } } return; } @@ -479,9 +475,9 @@ void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang, AddDefaultCIncludePaths(triple, HSOpts); } -/// RemoveDuplicates - If there are duplicate directory entries in the specified -/// search list, remove the later (dead) ones. Returns the number of non-system -/// headers removed, which is used to update NumAngled. +/// If there are duplicate directory entries in the specified search list, +/// remove the later (dead) ones. Returns the number of non-system headers +/// removed, which is used to update NumAngled. static unsigned RemoveDuplicates(std::vector<DirectoryLookupInfo> &SearchList, unsigned First, bool Verbose) { llvm::SmallPtrSet<const DirectoryEntry *, 8> SeenDirs; diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index a4cff403e739..d49d9e9e4b14 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -26,8 +26,6 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/Token.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -44,6 +42,7 @@ #include <cstddef> #include <cstdint> #include <cstring> +#include <optional> #include <string> #include <tuple> #include <utility> @@ -1048,9 +1047,11 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics( while (SM.isMacroArgExpansion(Loc)) Loc = SM.getImmediateExpansionRange(Loc).getBegin(); - // If the macro's spelling has no FileID, then it's actually a token paste - // or stringization (or similar) and not a macro at all. - if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc)))) + // If the macro's spelling isn't FileID or from scratch space, then it's + // actually a token paste or stringization (or similar) and not a macro at + // all. + SourceLocation SpellLoc = SM.getSpellingLoc(Loc); + if (!SpellLoc.isFileID() || SM.isWrittenInScratchSpace(SpellLoc)) return {}; // Find the spelling location of the start of the non-argument expansion @@ -1195,15 +1196,16 @@ static char GetTrigraphCharForLetter(char Letter) { /// whether trigraphs are enabled or not. static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs) { char Res = GetTrigraphCharForLetter(*CP); - if (!Res || !L) return Res; + if (!Res) + return Res; if (!Trigraphs) { - if (!L->isLexingRawMode()) + if (L && !L->isLexingRawMode()) L->Diag(CP-2, diag::trigraph_ignored); return 0; } - if (!L->isLexingRawMode()) + if (L && !L->isLexingRawMode()) L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1); return Res; } @@ -1256,12 +1258,12 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { } } -Optional<Token> Lexer::findNextToken(SourceLocation Loc, - const SourceManager &SM, - const LangOptions &LangOpts) { +std::optional<Token> Lexer::findNextToken(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { if (Loc.isMacroID()) { if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) - return None; + return std::nullopt; } Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); @@ -1272,7 +1274,7 @@ Optional<Token> Lexer::findNextToken(SourceLocation Loc, bool InvalidTemp = false; StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); if (InvalidTemp) - return None; + return std::nullopt; const char *TokenBegin = File.data() + LocInfo.second; @@ -1292,7 +1294,7 @@ Optional<Token> Lexer::findNextToken(SourceLocation Loc, SourceLocation Lexer::findLocationAfterToken( SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) { - Optional<Token> Tok = findNextToken(Loc, SM, LangOpts); + std::optional<Token> Tok = findNextToken(Loc, SM, LangOpts); if (!Tok || Tok->isNot(TKind)) return {}; SourceLocation TokenLoc = Tok->getLocation(); @@ -1457,7 +1459,35 @@ static bool isUnicodeWhitespace(uint32_t Codepoint) { return UnicodeWhitespaceChars.contains(Codepoint); } -static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { +static llvm::SmallString<5> codepointAsHexString(uint32_t C) { + llvm::SmallString<5> CharBuf; + llvm::raw_svector_ostream CharOS(CharBuf); + llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4); + return CharBuf; +} + +// To mitigate https://github.com/llvm/llvm-project/issues/54732, +// we allow "Mathematical Notation Characters" in identifiers. +// This is a proposed profile that extends the XID_Start/XID_continue +// with mathematical symbols, superscipts and subscripts digits +// found in some production software. +// https://www.unicode.org/L2/L2022/22230-math-profile.pdf +static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, + bool IsStart, bool &IsExtension) { + static const llvm::sys::UnicodeCharSet MathStartChars( + MathematicalNotationProfileIDStartRanges); + static const llvm::sys::UnicodeCharSet MathContinueChars( + MathematicalNotationProfileIDContinueRanges); + if (MathStartChars.contains(C) || + (!IsStart && MathContinueChars.contains(C))) { + IsExtension = true; + return true; + } + return false; +} + +static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, + bool &IsExtension) { if (LangOpts.AsmPreprocessor) { return false; } else if (LangOpts.DollarIdents && '$' == C) { @@ -1469,8 +1499,10 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { // '_' doesn't have the XID_Continue property but is allowed in C and C++. static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges); - return C == '_' || XIDStartChars.contains(C) || - XIDContinueChars.contains(C); + if (C == '_' || XIDStartChars.contains(C) || XIDContinueChars.contains(C)) + return true; + return isMathematicalExtensionID(C, LangOpts, /*IsStart=*/false, + IsExtension); } else if (LangOpts.C11) { static const llvm::sys::UnicodeCharSet C11AllowedIDChars( C11AllowedIDCharRanges); @@ -1482,16 +1514,21 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { } } -static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { +static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, + bool &IsExtension) { + assert(C > 0x7F && "isAllowedInitiallyIDChar called with an ASCII codepoint"); + IsExtension = false; if (LangOpts.AsmPreprocessor) { return false; } if (LangOpts.CPlusPlus || LangOpts.C2x) { static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); - // '_' doesn't have the XID_Start property but is allowed in C++. - return C == '_' || XIDStartChars.contains(C); + if (XIDStartChars.contains(C)) + return true; + return isMathematicalExtensionID(C, LangOpts, /*IsStart=*/true, + IsExtension); } - if (!isAllowedIDChar(C, LangOpts)) + if (!isAllowedIDChar(C, LangOpts, IsExtension)) return false; if (LangOpts.C11) { static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars( @@ -1503,6 +1540,22 @@ static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { return !C99DisallowedInitialIDChars.contains(C); } +static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, + CharSourceRange Range) { + + static const llvm::sys::UnicodeCharSet MathStartChars( + MathematicalNotationProfileIDStartRanges); + static const llvm::sys::UnicodeCharSet MathContinueChars( + MathematicalNotationProfileIDContinueRanges); + + (void)MathStartChars; + (void)MathContinueChars; + assert((MathStartChars.contains(C) || MathContinueChars.contains(C)) && + "Unexpected mathematical notation codepoint"); + Diags.Report(Range.getBegin(), diag::ext_mathematical_notation) + << codepointAsHexString(C) << Range; +} + static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End) { return CharSourceRange::getCharRange(L.getSourceLocation(Begin), @@ -1602,18 +1655,13 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, std::lower_bound(std::begin(SortedHomoglyphs), std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'}); if (Homoglyph->Character == C) { - llvm::SmallString<5> CharBuf; - { - llvm::raw_svector_ostream CharOS(CharBuf); - llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4); - } if (Homoglyph->LooksLike) { const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) - << Range << CharBuf << LooksLikeStr; + << Range << codepointAsHexString(C) << LooksLikeStr; } else { Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width) - << Range << CharBuf; + << Range << codepointAsHexString(C); } } } @@ -1624,25 +1672,24 @@ static void diagnoseInvalidUnicodeCodepointInIdentifier( if (isASCII(CodePoint)) return; - bool IsIDStart = isAllowedInitiallyIDChar(CodePoint, LangOpts); - bool IsIDContinue = IsIDStart || isAllowedIDChar(CodePoint, LangOpts); + bool IsExtension; + bool IsIDStart = isAllowedInitiallyIDChar(CodePoint, LangOpts, IsExtension); + bool IsIDContinue = + IsIDStart || isAllowedIDChar(CodePoint, LangOpts, IsExtension); if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue)) return; bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue; - llvm::SmallString<5> CharBuf; - llvm::raw_svector_ostream CharOS(CharBuf); - llvm::write_hex(CharOS, CodePoint, llvm::HexPrintStyle::Upper, 4); - if (!IsFirst || InvalidOnlyAtStart) { Diags.Report(Range.getBegin(), diag::err_character_not_allowed_identifier) - << Range << CharBuf << int(InvalidOnlyAtStart) + << Range << codepointAsHexString(CodePoint) << int(InvalidOnlyAtStart) << FixItHint::CreateRemoval(Range); } else { Diags.Report(Range.getBegin(), diag::err_character_not_allowed) - << Range << CharBuf << FixItHint::CreateRemoval(Range); + << Range << codepointAsHexString(CodePoint) + << FixItHint::CreateRemoval(Range); } } @@ -1653,8 +1700,8 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, if (CodePoint == 0) { return false; } - - if (!isAllowedIDChar(CodePoint, LangOpts)) { + bool IsExtension = false; + if (!isAllowedIDChar(CodePoint, LangOpts, IsExtension)) { if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint)) return false; if (!isLexingRawMode() && !ParsingPreprocessorDirective && @@ -1667,10 +1714,15 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, // We got a unicode codepoint that is neither a space nor a // a valid identifier part. // Carry on as if the codepoint was valid for recovery purposes. - } else if (!isLexingRawMode()) + } else if (!isLexingRawMode()) { + if (IsExtension) + diagnoseExtensionInIdentifier(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurPtr, UCNPtr)); + maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, makeCharRange(*this, CurPtr, UCNPtr), /*IsFirst=*/false); + } Result.setFlag(Token::HasUCN); if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || @@ -1693,7 +1745,9 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { if (Result != llvm::conversionOK) return false; - if (!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) { + bool IsExtension = false; + if (!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts, + IsExtension)) { if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint)) return false; @@ -1706,6 +1760,9 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { // a valid identifier part. Carry on as if the codepoint was // valid for recovery purposes. } else if (!isLexingRawMode()) { + if (IsExtension) + diagnoseExtensionInIdentifier(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurPtr, UnicodePtr)); maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, makeCharRange(*this, CurPtr, UnicodePtr), /*IsFirst=*/false); @@ -1719,9 +1776,13 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr) { - if (isAllowedInitiallyIDChar(C, LangOpts)) { + bool IsExtension = false; + if (isAllowedInitiallyIDChar(C, LangOpts, IsExtension)) { if (!isLexingRawMode() && !ParsingPreprocessorDirective && !PP->isPreprocessedOutput()) { + if (IsExtension) + diagnoseExtensionInIdentifier(PP->getDiagnostics(), C, + makeCharRange(*this, BufferPtr, CurPtr)); maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, makeCharRange(*this, BufferPtr, CurPtr), /*IsFirst=*/true); @@ -1735,7 +1796,7 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C, if (!isLexingRawMode() && !ParsingPreprocessorDirective && !PP->isPreprocessedOutput() && !isASCII(*BufferPtr) && - !isAllowedInitiallyIDChar(C, LangOpts) && !isUnicodeWhitespace(C)) { + !isUnicodeWhitespace(C)) { // Non-ASCII characters tend to creep into source code unintentionally. // Instead of letting the parser complain about the unknown token, // just drop the character. @@ -2905,7 +2966,7 @@ void Lexer::ReadToEndOfLine(SmallVectorImpl<char> *Result) { break; } // FALL THROUGH. - LLVM_FALLTHROUGH; + [[fallthrough]]; case '\r': case '\n': // Okay, we found the end of the line. First, back up past the \0, \r, \n. @@ -3195,9 +3256,9 @@ bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { return false; } -llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, - const char *SlashLoc, - Token *Result) { +std::optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, + const char *SlashLoc, + Token *Result) { unsigned CharSize; char Kind = getCharAndSize(StartPtr, CharSize); assert((Kind == 'u' || Kind == 'U') && "expected a UCN"); @@ -3216,7 +3277,7 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, if (!LangOpts.CPlusPlus && !LangOpts.C99) { if (Diagnose) Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89); - return llvm::None; + return std::nullopt; } const char *CurPtr = StartPtr + CharSize; @@ -3225,7 +3286,7 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, uint32_t CodePoint = 0; while (Count != NumHexDigits || Delimited) { char C = getCharAndSize(CurPtr, CharSize); - if (!Delimited && C == '{') { + if (!Delimited && Count == 0 && C == '{') { Delimited = true; CurPtr += CharSize; continue; @@ -3242,15 +3303,15 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, if (!Delimited) break; if (Diagnose) - Diag(BufferPtr, diag::warn_delimited_ucn_incomplete) + Diag(SlashLoc, diag::warn_delimited_ucn_incomplete) << StringRef(KindLoc, 1); - return llvm::None; + return std::nullopt; } if (CodePoint & 0xF000'0000) { if (Diagnose) Diag(KindLoc, diag::err_escape_too_large) << 0; - return llvm::None; + return std::nullopt; } CodePoint <<= 4; @@ -3261,21 +3322,21 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, if (Count == 0) { if (Diagnose) - Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty + Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty : diag::warn_ucn_escape_no_digits) << StringRef(KindLoc, 1); - return llvm::None; + return std::nullopt; } if (Delimited && Kind == 'U') { if (Diagnose) - Diag(StartPtr, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1); - return llvm::None; + Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1); + return std::nullopt; } if (!Delimited && Count != NumHexDigits) { if (Diagnose) { - Diag(BufferPtr, diag::warn_ucn_escape_incomplete); + Diag(SlashLoc, diag::warn_ucn_escape_incomplete); // If the user wrote \U1234, suggest a fixit to \u. if (Count == 4 && NumHexDigits == 8) { CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1); @@ -3283,19 +3344,22 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, << FixItHint::CreateReplacement(URange, "u"); } } - return llvm::None; + return std::nullopt; } if (Delimited && PP) { - Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b - ? diag::warn_cxx2b_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) + Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b + ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) << /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0); } if (Result) { Result->setFlag(Token::HasUCN); - if (CurPtr - StartPtr == (ptrdiff_t)(Count + 2 + (Delimited ? 2 : 0))) + // If the UCN contains either a trigraph or a line splicing, + // we need to call getAndAdvanceChar again to set the appropriate flags + // on Result. + if (CurPtr - StartPtr == (ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0))) StartPtr = CurPtr; else while (StartPtr != CurPtr) @@ -3306,8 +3370,9 @@ llvm::Optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, return CodePoint; } -llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, - Token *Result) { +std::optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, + const char *SlashLoc, + Token *Result) { unsigned CharSize; bool Diagnose = Result && !isLexingRawMode(); @@ -3320,8 +3385,8 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, C = getCharAndSize(CurPtr, CharSize); if (C != '{') { if (Diagnose) - Diag(StartPtr, diag::warn_ucn_escape_incomplete); - return llvm::None; + Diag(SlashLoc, diag::warn_ucn_escape_incomplete); + return std::nullopt; } CurPtr += CharSize; const char *StartName = CurPtr; @@ -3335,28 +3400,29 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, break; } - if (!isAlphanumeric(C) && C != '_' && C != '-' && C != ' ') + if (isVerticalWhitespace(C)) break; Buffer.push_back(C); } if (!FoundEndDelimiter || Buffer.empty()) { if (Diagnose) - Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty + Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty : diag::warn_delimited_ucn_incomplete) << StringRef(KindLoc, 1); - return llvm::None; + return std::nullopt; } StringRef Name(Buffer.data(), Buffer.size()); - llvm::Optional<char32_t> Res = + std::optional<char32_t> Match = llvm::sys::unicode::nameToCodepointStrict(Name); - llvm::Optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch; - if (!Res) { - if (!isLexingRawMode()) { - Diag(StartPtr, diag::err_invalid_ucn_name) - << StringRef(Buffer.data(), Buffer.size()); - LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name); + std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch; + if (!Match) { + LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name); + if (Diagnose) { + Diag(StartName, diag::err_invalid_ucn_name) + << StringRef(Buffer.data(), Buffer.size()) + << makeCharRange(*this, StartName, CurPtr - CharSize); if (LooseMatch) { Diag(StartName, diag::note_invalid_ucn_name_loose_matching) << FixItHint::CreateReplacement( @@ -3364,27 +3430,30 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, LooseMatch->Name); } } - // When finding a match using Unicode loose matching rules - // recover after having emitted a diagnostic. - if (!LooseMatch) - return llvm::None; - // We do not offer missspelled character names suggestions here + // We do not offer misspelled character names suggestions here // as the set of what would be a valid suggestion depends on context, // and we should not make invalid suggestions. } - if (Diagnose && PP && !LooseMatch) - Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b - ? diag::warn_cxx2b_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) + if (Diagnose && Match) + Diag(SlashLoc, PP->getLangOpts().CPlusPlus2b + ? diag::warn_cxx2b_delimited_escape_sequence + : diag::ext_delimited_escape_sequence) << /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0); - if (LooseMatch) - Res = LooseMatch->CodePoint; + // If no diagnostic has been emitted yet, likely because we are doing a + // tentative lexing, we do not want to recover here to make sure the token + // will not be incorrectly considered valid. This function will be called + // again and a diagnostic emitted then. + if (LooseMatch && Diagnose) + Match = LooseMatch->CodePoint; if (Result) { Result->setFlag(Token::HasUCN); - if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 4)) + // If the UCN contains either a trigraph or a line splicing, + // we need to call getAndAdvanceChar again to set the appropriate flags + // on Result. + if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 3)) StartPtr = CurPtr; else while (StartPtr != CurPtr) @@ -3392,19 +3461,19 @@ llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, } else { StartPtr = CurPtr; } - return *Res; + return Match ? std::optional<uint32_t>(*Match) : std::nullopt; } uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, Token *Result) { unsigned CharSize; - llvm::Optional<uint32_t> CodePointOpt; + std::optional<uint32_t> CodePointOpt; char Kind = getCharAndSize(StartPtr, CharSize); if (Kind == 'u' || Kind == 'U') CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc, Result); else if (Kind == 'N') - CodePointOpt = tryReadNamedUCN(StartPtr, Result); + CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc, Result); if (!CodePointOpt) return 0; @@ -3516,10 +3585,9 @@ bool Lexer::Lex(Token &Result) { /// token, not a normal token, as such, it is an internal interface. It assumes /// that the Flags of result have been cleared before calling this. bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { -LexNextToken: - // New token, can't need cleaning yet. - Result.clearFlag(Token::NeedsCleaning); - Result.setIdentifierInfo(nullptr); +LexStart: + assert(!Result.needsCleaning() && "Result needs cleaning"); + assert(!Result.hasPtrData() && "Result has not been reset"); // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; @@ -3591,7 +3659,7 @@ LexNextToken: case '\r': if (CurPtr[0] == '\n') (void)getAndAdvanceChar(CurPtr, Result); - LLVM_FALLTHROUGH; + [[fallthrough]]; case '\n': // If we are inside a preprocessor directive and we see the end of line, // we know we are done with the directive, so return an EOD token. @@ -3788,7 +3856,7 @@ LexNextToken: return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result), tok::wide_char_constant); // FALL THROUGH, treating L like the start of an identifier. - LLVM_FALLTHROUGH; + [[fallthrough]]; // C99 6.4.2: Identifiers. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': @@ -4301,6 +4369,10 @@ HandleDirective: // We parsed the directive; lex a token with the new state. return false; + +LexNextToken: + Result.clearFlag(Token::NeedsCleaning); + goto LexStart; } const char *Lexer::convertDependencyDirectiveToken( @@ -4323,6 +4395,8 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) { if (DepDirectives.front().Kind == pp_eof) return LexEndOfFile(Result, BufferEnd); + if (DepDirectives.front().Kind == tokens_present_before_eof) + MIOpt.ReadToken(); NextDepDirectiveTokenIndex = 0; DepDirectives = DepDirectives.drop_front(); } @@ -4334,6 +4408,22 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { MIOpt.ReadToken(); } + if (ParsingFilename && DDTok.is(tok::less)) { + BufferPtr = BufferStart + DDTok.Offset; + LexAngledStringLiteral(Result, BufferPtr + 1); + if (Result.isNot(tok::header_name)) + return true; + // Advance the index of lexed tokens. + while (true) { + const dependency_directives_scan::Token &NextTok = + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex]; + if (BufferStart + NextTok.Offset >= BufferPtr) + break; + ++NextDepDirectiveTokenIndex; + } + return true; + } + const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result); if (Result.is(tok::hash) && Result.isAtStartOfLine()) { @@ -4398,6 +4488,7 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { case cxx_import_decl: case cxx_export_module_decl: case cxx_export_import_decl: + case tokens_present_before_eof: break; case pp_if: case pp_ifdef: diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 53635a7385ec..421a85336043 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -358,7 +358,7 @@ void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) { ++I; auto Delim = std::find(I, Input.end(), '}'); assert(Delim != Input.end()); - llvm::Optional<llvm::sys::unicode::LooseMatchingResult> Res = + std::optional<llvm::sys::unicode::LooseMatchingResult> Res = llvm::sys::unicode::nameToCodepointLooseMatching( StringRef(I, std::distance(I, Delim))); assert(Res); @@ -487,7 +487,7 @@ static void DiagnoseInvalidUnicodeCharacterName( namespace u = llvm::sys::unicode; - llvm::Optional<u::LooseMatchingResult> Res = + std::optional<u::LooseMatchingResult> Res = u::nameToCodepointLooseMatching(Name); if (Res) { Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd, @@ -515,8 +515,9 @@ static void DiagnoseInvalidUnicodeCharacterName( std::string Str; llvm::UTF32 V = Match.Value; - LLVM_ATTRIBUTE_UNUSED bool Converted = + bool Converted = llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&V, 1), Str); + (void)Converted; assert(Converted && "Found a match wich is not a unicode character"); Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd, @@ -545,15 +546,13 @@ static bool ProcessNamedUCNEscape(const char *ThisTokBegin, diag::err_delimited_escape_missing_brace) << StringRef(&ThisTokBuf[-1], 1); } - ThisTokBuf++; return false; } ThisTokBuf++; - const char *ClosingBrace = - std::find_if_not(ThisTokBuf, ThisTokEnd, [](char C) { - return llvm::isAlnum(C) || llvm::isSpace(C) || C == '_' || C == '-'; - }); - bool Incomplete = ClosingBrace == ThisTokEnd || *ClosingBrace != '}'; + const char *ClosingBrace = std::find_if(ThisTokBuf, ThisTokEnd, [](char C) { + return C == '}' || isVerticalWhitespace(C); + }); + bool Incomplete = ClosingBrace == ThisTokEnd; bool Empty = ClosingBrace == ThisTokBuf; if (Incomplete || Empty) { if (Diags) { @@ -567,8 +566,7 @@ static bool ProcessNamedUCNEscape(const char *ThisTokBegin, } StringRef Name(ThisTokBuf, ClosingBrace - ThisTokBuf); ThisTokBuf = ClosingBrace + 1; - llvm::Optional<char32_t> Res = - llvm::sys::unicode::nameToCodepointStrict(Name); + std::optional<char32_t> Res = llvm::sys::unicode::nameToCodepointStrict(Name); if (!Res) { if (Diags) DiagnoseInvalidUnicodeCharacterName(Diags, Features, Loc, ThisTokBegin, @@ -766,13 +764,13 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, switch (bytesToWrite) { // note: everything falls through. case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; - LLVM_FALLTHROUGH; + [[fallthrough]]; case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; - LLVM_FALLTHROUGH; + [[fallthrough]]; case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6; - LLVM_FALLTHROUGH; + [[fallthrough]]; case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]); } @@ -945,9 +943,13 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // CUDA host and device may have different _Float16 support, therefore // allows f16 literals to avoid false alarm. + // When we compile for OpenMP target offloading on NVPTX, f16 suffix + // should also be supported. // ToDo: more precise check for CUDA. - if ((Target.hasFloat16Type() || LangOpts.CUDA) && s + 2 < ThisTokEnd && - s[1] == '1' && s[2] == '6') { + // TODO: AMDGPU might also support it in the future. + if ((Target.hasFloat16Type() || LangOpts.CUDA || + (LangOpts.OpenMPIsDevice && Target.getTriple().isNVPTX())) && + s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { s += 2; // success, eat up 2 characters. isFloat16 = true; continue; @@ -1037,7 +1039,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, break; } } - LLVM_FALLTHROUGH; + [[fallthrough]]; case 'j': case 'J': if (isImaginary) break; // Cannot be repeated. diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp index 7ede00b4aa64..c54f69bb9ead 100644 --- a/clang/lib/Lex/MacroArgs.cpp +++ b/clang/lib/Lex/MacroArgs.cpp @@ -62,7 +62,7 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, // Copy the actual unexpanded tokens to immediately after the result ptr. if (!UnexpArgTokens.empty()) { - static_assert(std::is_trivial<Token>::value, + static_assert(std::is_trivial_v<Token>, "assume trivial copyability if copying into the " "uninitialized array (as opposed to reusing a cached " "MacroArgs)"); @@ -94,7 +94,7 @@ MacroArgs *MacroArgs::deallocate() { // Run the dtor to deallocate the vectors. this->~MacroArgs(); // Release the memory for the object. - static_assert(std::is_trivially_destructible<Token>::value, + static_assert(std::is_trivially_destructible_v<Token>, "assume trivially destructible and forego destructors"); free(this); @@ -169,7 +169,7 @@ const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg, std::vector<Token> &Result = PreExpArgTokens[Arg]; if (!Result.empty()) return Result; - SaveAndRestore<bool> PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true); + SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true); const Token *AT = getUnexpArgument(Arg); unsigned NumToks = getArgLength(AT)+1; // Include the EOF. diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp index eae12beb6244..39bb0f44eff2 100644 --- a/clang/lib/Lex/MacroInfo.cpp +++ b/clang/lib/Lex/MacroInfo.cpp @@ -18,12 +18,12 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" #include <cassert> +#include <optional> #include <utility> using namespace clang; @@ -34,11 +34,11 @@ namespace { // and 4 byte SourceLocation. template <int> class MacroInfoSizeChecker { public: - constexpr static bool AsExpected = true; + [[maybe_unused]] constexpr static bool AsExpected = true; }; template <> class MacroInfoSizeChecker<8> { public: - constexpr static bool AsExpected = + [[maybe_unused]] constexpr static bool AsExpected = sizeof(MacroInfo) == (32 + sizeof(SourceLocation) * 2); }; @@ -118,7 +118,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, if (A.getKind() != B.getKind()) return false; - // If this isn't the first first token, check that the whitespace and + // If this isn't the first token, check that the whitespace and // start-of-line characteristics match. if (i != 0 && (A.isAtStartOfLine() != B.isAtStartOfLine() || @@ -198,7 +198,7 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const { MacroDirective::DefInfo MacroDirective::getDefinition() { MacroDirective *MD = this; SourceLocation UndefLoc; - Optional<bool> isPublic; + std::optional<bool> isPublic; for (; MD; MD = MD->getPrevious()) { if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD)) return DefInfo(DefMD, UndefLoc, !isPublic || *isPublic); @@ -213,7 +213,7 @@ MacroDirective::DefInfo MacroDirective::getDefinition() { isPublic = VisMD->isPublic(); } - return DefInfo(nullptr, UndefLoc, !isPublic || isPublic.value()); + return DefInfo(nullptr, UndefLoc, !isPublic || *isPublic); } const MacroDirective::DefInfo diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 47d6f5893e97..ee2cca4e0814 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -28,7 +28,6 @@ #include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Token.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" @@ -47,6 +46,7 @@ #include <cassert> #include <cstdint> #include <cstring> +#include <optional> #include <string> #include <system_error> #include <utility> @@ -75,7 +75,6 @@ void ModuleMap::addLinkAsDependency(Module *Mod) { Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) { switch ((int)Role) { - default: llvm_unreachable("unknown header role"); case NormalHeader: return Module::HK_Normal; case PrivateHeader: @@ -84,7 +83,10 @@ Module::HeaderKind ModuleMap::headerRoleToKind(ModuleHeaderRole Role) { return Module::HK_Textual; case PrivateHeader | TextualHeader: return Module::HK_PrivateTextual; + case ExcludedHeader: + return Module::HK_Excluded; } + llvm_unreachable("unknown header role"); } ModuleMap::ModuleHeaderRole @@ -99,11 +101,15 @@ ModuleMap::headerKindToRole(Module::HeaderKind Kind) { case Module::HK_PrivateTextual: return ModuleHeaderRole(PrivateHeader | TextualHeader); case Module::HK_Excluded: - llvm_unreachable("unexpected header kind"); + return ExcludedHeader; } llvm_unreachable("unknown header kind"); } +bool ModuleMap::isModular(ModuleHeaderRole Role) { + return !(Role & (ModuleMap::TextualHeader | ModuleMap::ExcludedHeader)); +} + Module::ExportDecl ModuleMap::resolveExport(Module *Mod, const Module::UnresolvedExportDecl &Unresolved, @@ -171,23 +177,23 @@ static void appendSubframeworkPaths(Module *Mod, llvm::sys::path::append(Path, "Frameworks", Framework + ".framework"); } -Optional<FileEntryRef> ModuleMap::findHeader( +OptionalFileEntryRef ModuleMap::findHeader( Module *M, const Module::UnresolvedHeaderDirective &Header, SmallVectorImpl<char> &RelativePathName, bool &NeedsFramework) { // Search for the header file within the module's home directory. auto *Directory = M->Directory; SmallString<128> FullPathName(Directory->getName()); - auto GetFile = [&](StringRef Filename) -> Optional<FileEntryRef> { + auto GetFile = [&](StringRef Filename) -> OptionalFileEntryRef { auto File = expectedToOptional(SourceMgr.getFileManager().getFileRef(Filename)); if (!File || (Header.Size && File->getSize() != *Header.Size) || (Header.ModTime && File->getModificationTime() != *Header.ModTime)) - return None; + return std::nullopt; return *File; }; - auto GetFrameworkFile = [&]() -> Optional<FileEntryRef> { + auto GetFrameworkFile = [&]() -> OptionalFileEntryRef { unsigned FullPathLength = FullPathName.size(); appendSubframeworkPaths(M, RelativePathName); unsigned RelativePathLength = RelativePathName.size(); @@ -241,7 +247,7 @@ Optional<FileEntryRef> ModuleMap::findHeader( << Header.FileName << M->getFullModuleName(); NeedsFramework = true; } - return None; + return std::nullopt; } return NormalHdrFile; @@ -251,7 +257,7 @@ void ModuleMap::resolveHeader(Module *Mod, const Module::UnresolvedHeaderDirective &Header, bool &NeedsFramework) { SmallString<128> RelativePathName; - if (Optional<FileEntryRef> File = + if (OptionalFileEntryRef File = findHeader(Mod, Header, RelativePathName, NeedsFramework)) { if (Header.IsUmbrella) { const DirectoryEntry *UmbrellaDir = &File->getDir().getDirEntry(); @@ -264,10 +270,7 @@ void ModuleMap::resolveHeader(Module *Mod, } else { Module::Header H = {Header.FileName, std::string(RelativePathName.str()), *File}; - if (Header.Kind == Module::HK_Excluded) - excludeHeader(Mod, H); - else - addHeader(Mod, H, headerKindToRole(Header.Kind)); + addHeader(Mod, H, headerKindToRole(Header.Kind)); } } else if (Header.HasBuiltinHeader && !Header.Size && !Header.ModTime) { // There's a builtin header but no corresponding on-disk header. Assume @@ -301,7 +304,7 @@ bool ModuleMap::resolveAsBuiltinHeader( // supplied by Clang. Find that builtin header. SmallString<128> Path; llvm::sys::path::append(Path, BuiltinIncludeDir->getName(), Header.FileName); - auto File = SourceMgr.getFileManager().getFile(Path); + auto File = SourceMgr.getFileManager().getOptionalFileRef(Path); if (!File) return false; @@ -479,7 +482,7 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule, if (RequestingModule) { resolveUses(RequestingModule, /*Complain=*/false); - resolveHeaderDirectives(RequestingModule, /*File=*/llvm::None); + resolveHeaderDirectives(RequestingModule, /*File=*/std::nullopt); } bool Excluded = false; @@ -489,6 +492,12 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule, HeadersMap::iterator Known = findKnownHeader(File); if (Known != Headers.end()) { for (const KnownHeader &Header : Known->second) { + // Excluded headers don't really belong to a module. + if (Header.getRole() == ModuleMap::ExcludedHeader) { + Excluded = true; + continue; + } + // Remember private headers for later printing of a diagnostic. if (violatesPrivateInclude(RequestingModule, File, Header)) { Private = Header.getModule(); @@ -562,12 +571,18 @@ static bool isBetterKnownHeader(const ModuleMap::KnownHeader &New, (Old.getRole() & ModuleMap::TextualHeader)) return !(New.getRole() & ModuleMap::TextualHeader); + // Prefer a non-excluded header over an excluded header. + if ((New.getRole() == ModuleMap::ExcludedHeader) != + (Old.getRole() == ModuleMap::ExcludedHeader)) + return New.getRole() != ModuleMap::ExcludedHeader; + // Don't have a reason to choose between these. Just keep the first one. return false; } ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File, - bool AllowTextual) { + bool AllowTextual, + bool AllowExcluded) { auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader { if (!AllowTextual && R.getRole() & ModuleMap::TextualHeader) return {}; @@ -579,6 +594,9 @@ ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File, ModuleMap::KnownHeader Result; // Iterate over all modules that 'File' is part of to find the best fit. for (KnownHeader &H : Known->second) { + // Cannot use a module if the header is excluded in it. + if (!AllowExcluded && H.getRole() == ModuleMap::ExcludedHeader) + continue; // Prefer a header from the source module over all others. if (H.getModule()->getTopLevelModule() == SourceModule) return MakeResult(H); @@ -607,7 +625,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) { UmbrellaModule = UmbrellaModule->Parent; if (UmbrellaModule->InferSubmodules) { - const FileEntry *UmbrellaModuleMap = + OptionalFileEntryRefDegradesToFileEntryPtr UmbrellaModuleMap = getModuleMapFileForUniquing(UmbrellaModule); // Infer submodules for each of the directories we found between @@ -672,7 +690,7 @@ ModuleMap::findAllModulesForHeader(const FileEntry *File) { if (findOrCreateModuleForHeaderInUmbrellaDir(File)) return Headers.find(File)->second; - return None; + return std::nullopt; } ArrayRef<ModuleMap::KnownHeader> @@ -681,7 +699,7 @@ ModuleMap::findResolvedModulesForHeader(const FileEntry *File) const { resolveHeaderDirectives(File); auto It = Headers.find(File); if (It == Headers.end()) - return None; + return std::nullopt; return It->second; } @@ -700,6 +718,9 @@ ModuleMap::isHeaderUnavailableInModule(const FileEntry *Header, E = Known->second.end(); I != E; ++I) { + if (I->getRole() == ModuleMap::ExcludedHeader) + continue; + if (I->isAvailable() && (!RequestingModule || I->getModule()->isSubModuleOf(RequestingModule))) { @@ -852,8 +873,7 @@ ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent, } Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, - StringRef Name, - Module *GlobalModule) { + StringRef Name) { assert(LangOpts.CurrentModule == Name && "module name mismatch"); assert(!Modules[Name] && "redefining existing module"); @@ -879,29 +899,6 @@ Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, return Result; } -Module *ModuleMap::createHeaderModule(StringRef Name, - ArrayRef<Module::Header> Headers) { - assert(LangOpts.CurrentModule == Name && "module name mismatch"); - assert(!Modules[Name] && "redefining existing module"); - - auto *Result = - new Module(Name, SourceLocation(), nullptr, /*IsFramework*/ false, - /*IsExplicit*/ false, NumCreatedModules++); - Result->Kind = Module::ModuleInterfaceUnit; - Modules[Name] = SourceModule = Result; - - for (const Module::Header &H : Headers) { - auto *M = new Module(H.NameAsWritten, SourceLocation(), Result, - /*IsFramework*/ false, - /*IsExplicit*/ true, NumCreatedModules++); - // Header modules are implicitly 'export *'. - M->Exports.push_back(Module::ExportDecl(nullptr, true)); - addHeader(M, H, NormalHeader); - } - - return Result; -} - Module *ModuleMap::createHeaderUnit(SourceLocation Loc, StringRef Name, Module::Header H) { assert(LangOpts.CurrentModule == Name && "module name mismatch"); @@ -1018,14 +1015,16 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir, // If we're not allowed to infer a framework module, don't. if (!canInfer) return nullptr; - } else - ModuleMapFile = getModuleMapFileForUniquing(Parent); - + } else { + OptionalFileEntryRefDegradesToFileEntryPtr ModuleMapRef = + getModuleMapFileForUniquing(Parent); + ModuleMapFile = ModuleMapRef; + } // Look for an umbrella header. SmallString<128> UmbrellaName = StringRef(FrameworkDir->getName()); llvm::sys::path::append(UmbrellaName, "Headers", ModuleName + ".h"); - auto UmbrellaHeader = FileMgr.getFile(UmbrellaName); + auto UmbrellaHeader = FileMgr.getOptionalFileRef(UmbrellaName); // FIXME: If there's no umbrella header, we could probably scan the // framework to load *everything*. But, it's not clear that this is a good @@ -1137,14 +1136,14 @@ Module *ModuleMap::createShadowedModule(StringRef Name, bool IsFramework, } void ModuleMap::setUmbrellaHeader( - Module *Mod, const FileEntry *UmbrellaHeader, const Twine &NameAsWritten, + Module *Mod, FileEntryRef UmbrellaHeader, const Twine &NameAsWritten, const Twine &PathRelativeToRootModuleDirectory) { Headers[UmbrellaHeader].push_back(KnownHeader(Mod, NormalHeader)); - Mod->Umbrella = UmbrellaHeader; + Mod->Umbrella = &UmbrellaHeader.getMapEntry(); Mod->UmbrellaAsWritten = NameAsWritten.str(); Mod->UmbrellaRelativeToRootModuleDirectory = PathRelativeToRootModuleDirectory.str(); - UmbrellaDirs[UmbrellaHeader->getDir()] = Mod; + UmbrellaDirs[UmbrellaHeader.getDir()] = Mod; // Notify callbacks that we just added a new header. for (const auto &Cb : Callbacks) @@ -1214,11 +1213,11 @@ void ModuleMap::resolveHeaderDirectives(const FileEntry *File) const { } void ModuleMap::resolveHeaderDirectives( - Module *Mod, llvm::Optional<const FileEntry *> File) const { + Module *Mod, std::optional<const FileEntry *> File) const { bool NeedsFramework = false; SmallVector<Module::UnresolvedHeaderDirective, 1> NewHeaders; - const auto Size = File ? File.value()->getSize() : 0; - const auto ModTime = File ? File.value()->getModificationTime() : 0; + const auto Size = File ? (*File)->getSize() : 0; + const auto ModTime = File ? (*File)->getModificationTime() : 0; for (auto &Header : Mod->UnresolvedHeaders) { if (File && ((Header.ModTime && Header.ModTime != ModTime) || @@ -1260,29 +1259,21 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header, Cb->moduleMapAddHeader(Header.Entry->getName()); } -void ModuleMap::excludeHeader(Module *Mod, Module::Header Header) { - // Add this as a known header so we won't implicitly add it to any - // umbrella directory module. - // FIXME: Should we only exclude it from umbrella modules within the - // specified module? - (void) Headers[Header.Entry]; - - Mod->Headers[Module::HK_Excluded].push_back(std::move(Header)); -} - -const FileEntry * +OptionalFileEntryRef ModuleMap::getContainingModuleMapFile(const Module *Module) const { if (Module->DefinitionLoc.isInvalid()) - return nullptr; + return std::nullopt; - return SourceMgr.getFileEntryForID( - SourceMgr.getFileID(Module->DefinitionLoc)); + return SourceMgr.getFileEntryRefForID( + SourceMgr.getFileID(Module->DefinitionLoc)); } -const FileEntry *ModuleMap::getModuleMapFileForUniquing(const Module *M) const { +OptionalFileEntryRef +ModuleMap::getModuleMapFileForUniquing(const Module *M) const { if (M->IsInferred) { assert(InferredModuleAllowedBy.count(M) && "missing inferred module map"); - return InferredModuleAllowedBy.find(M)->second; + // FIXME: Update InferredModuleAllowedBy to use FileEntryRef. + return InferredModuleAllowedBy.find(M)->second->getLastRef(); } return getContainingModuleMapFile(M); } @@ -1292,6 +1283,49 @@ void ModuleMap::setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap) { InferredModuleAllowedBy[M] = ModMap; } +std::error_code +ModuleMap::canonicalizeModuleMapPath(SmallVectorImpl<char> &Path) { + StringRef Dir = llvm::sys::path::parent_path({Path.data(), Path.size()}); + + // Do not canonicalize within the framework; the module map parser expects + // Modules/ not Versions/A/Modules. + if (llvm::sys::path::filename(Dir) == "Modules") { + StringRef Parent = llvm::sys::path::parent_path(Dir); + if (Parent.endswith(".framework")) + Dir = Parent; + } + + FileManager &FM = SourceMgr.getFileManager(); + auto DirEntry = FM.getDirectory(Dir.empty() ? "." : Dir); + if (!DirEntry) + return DirEntry.getError(); + + // Canonicalize the directory. + StringRef CanonicalDir = FM.getCanonicalName(*DirEntry); + if (CanonicalDir != Dir) { + auto CanonicalDirEntry = FM.getDirectory(CanonicalDir); + // Only use the canonicalized path if it resolves to the same entry as the + // original. This is not true if there's a VFS overlay on top of a FS where + // the directory is a symlink. The overlay would not remap the target path + // of the symlink to the same directory entry in that case. + if (CanonicalDirEntry && *CanonicalDirEntry == *DirEntry) { + bool Done = llvm::sys::path::replace_path_prefix(Path, Dir, CanonicalDir); + (void)Done; + assert(Done && "Path should always start with Dir"); + } + } + + // In theory, the filename component should also be canonicalized if it + // on a case-insensitive filesystem. However, the extra canonicalization is + // expensive and if clang looked up the filename it will always be lowercase. + + // Remove ., remove redundant separators, and switch to native separators. + // This is needed for separators between CanonicalDir and the filename. + llvm::sys::path::remove_dots(Path); + + return std::error_code(); +} + void ModuleMap::addAdditionalModuleMapFile(const Module *M, const FileEntry *ModuleMap) { AdditionalModMaps[M].insert(ModuleMap); @@ -1668,7 +1702,7 @@ retry: break; } } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: Diags.Report(Tok.getLocation(), diag::err_mmap_unknown_token); @@ -2026,8 +2060,7 @@ void ModuleMapParser::parseModuleDecl() { ActiveModule->IsSystem = true; if (Attrs.IsExternC) ActiveModule->IsExternC = true; - if (Attrs.NoUndeclaredIncludes || - (!ActiveModule->Parent && ModuleName == "Darwin")) + if (Attrs.NoUndeclaredIncludes) ActiveModule->NoUndeclaredIncludes = true; ActiveModule->Directory = Directory; @@ -2300,6 +2333,7 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, SourceLocation LeadingLoc) { // We've already consumed the first token. ModuleMap::ModuleHeaderRole Role = ModuleMap::NormalHeader; + if (LeadingToken == MMToken::PrivateKeyword) { Role = ModuleMap::PrivateHeader; // 'private' may optionally be followed by 'textual'. @@ -2307,6 +2341,8 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, LeadingToken = Tok.Kind; consumeToken(); } + } else if (LeadingToken == MMToken::ExcludeKeyword) { + Role = ModuleMap::ExcludedHeader; } if (LeadingToken == MMToken::TextualKeyword) @@ -2340,9 +2376,7 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, Header.FileName = std::string(Tok.getString()); Header.FileNameLoc = consumeToken(); Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword; - Header.Kind = - (LeadingToken == MMToken::ExcludeKeyword ? Module::HK_Excluded - : Map.headerRoleToKind(Role)); + Header.Kind = Map.headerRoleToKind(Role); // Check whether we already have an umbrella. if (Header.IsUmbrella && ActiveModule->Umbrella) { @@ -2476,8 +2510,8 @@ void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) { SourceMgr.getFileManager().getVirtualFileSystem(); for (llvm::vfs::recursive_directory_iterator I(FS, Dir->getName(), EC), E; I != E && !EC; I.increment(EC)) { - if (auto FE = SourceMgr.getFileManager().getFile(I->path())) { - Module::Header Header = {"", std::string(I->path()), *FE}; + if (auto FE = SourceMgr.getFileManager().getOptionalFileRef(I->path())) { + Module::Header Header = {"", std::string(I->path()), FE}; Headers.push_back(std::move(Header)); } } @@ -3033,7 +3067,7 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem, } assert(Target && "Missing target information"); - llvm::Optional<llvm::MemoryBufferRef> Buffer = SourceMgr.getBufferOrNone(ID); + std::optional<llvm::MemoryBufferRef> Buffer = SourceMgr.getBufferOrNone(ID); if (!Buffer) return ParsedModuleMap[File] = true; assert((!Offset || *Offset <= Buffer->getBufferSize()) && diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp index b618071590ba..f2b60a728e90 100644 --- a/clang/lib/Lex/PPCallbacks.cpp +++ b/clang/lib/Lex/PPCallbacks.cpp @@ -15,16 +15,15 @@ using namespace clang; PPCallbacks::~PPCallbacks() = default; void PPCallbacks::HasInclude(SourceLocation Loc, StringRef FileName, - bool IsAngled, Optional<FileEntryRef> File, + bool IsAngled, OptionalFileEntryRef File, SrcMgr::CharacteristicKind FileType) {} // Out of line key method. PPChainedCallbacks::~PPChainedCallbacks() = default; void PPChainedCallbacks::HasInclude(SourceLocation Loc, StringRef FileName, - bool IsAngled, Optional<FileEntryRef> File, + bool IsAngled, OptionalFileEntryRef File, SrcMgr::CharacteristicKind FileType) { First->HasInclude(Loc, FileName, IsAngled, File, FileType); Second->HasInclude(Loc, FileName, IsAngled, File, FileType); } - diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 9a8fd4391b41..6ae513dea878 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -47,6 +47,7 @@ #include <cassert> #include <cstring> #include <new> +#include <optional> #include <string> #include <utility> @@ -57,9 +58,8 @@ using namespace clang; //===----------------------------------------------------------------------===// MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) { - auto *MIChain = new (BP) MacroInfoChain{L, MIChainHead}; - MIChainHead = MIChain; - return &MIChain->MI; + static_assert(std::is_trivially_destructible_v<MacroInfo>, ""); + return new (BP) MacroInfo(L); } DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, @@ -109,25 +109,6 @@ enum PPElifDiag { PED_Elifndef }; -// The -fmodule-name option tells the compiler to textually include headers in -// the specified module, meaning clang won't build the specified module. This is -// useful in a number of situations, for instance, when building a library that -// vends a module map, one might want to avoid hitting intermediate build -// products containimg the module map or avoid finding the system installed -// modulemap for that library. -static bool isForModuleBuilding(Module *M, StringRef CurrentModule, - StringRef ModuleName) { - StringRef TopLevelName = M->getTopLevelModuleName(); - - // When building framework Foo, we wanna make sure that Foo *and* Foo_Private - // are textually included and no modules are built for both. - if (M->getTopLevelModule()->IsFramework && CurrentModule == ModuleName && - !CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private")) - TopLevelName = TopLevelName.drop_back(8); - - return TopLevelName == CurrentModule; -} - static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) { const LangOptions &Lang = PP.getLangOpts(); if (isReservedInAllContexts(II->isReserved(Lang))) { @@ -274,9 +255,9 @@ static bool warnByDefaultOnWrongCase(StringRef Include) { /// \param Candidates the candidates to find a similar string. /// /// \returns a similar string if exists. If no similar string exists, -/// returns None. -static Optional<StringRef> findSimilarStr( - StringRef LHS, const std::vector<StringRef> &Candidates) { +/// returns std::nullopt. +static std::optional<StringRef> +findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) { // We need to check if `Candidates` has the exact case-insensitive string // because the Levenshtein distance match does not care about it. for (StringRef C : Candidates) { @@ -291,7 +272,7 @@ static Optional<StringRef> findSimilarStr( size_t Length = LHS.size(); size_t MaxDist = Length < 3 ? Length - 1 : Length / 3; - Optional<std::pair<StringRef, size_t>> SimilarStr = None; + std::optional<std::pair<StringRef, size_t>> SimilarStr; for (StringRef C : Candidates) { size_t CurDist = LHS.edit_distance(C, true); if (CurDist <= MaxDist) { @@ -308,7 +289,7 @@ static Optional<StringRef> findSimilarStr( if (SimilarStr) { return SimilarStr->first; } else { - return None; + return std::nullopt; } } @@ -456,7 +437,7 @@ void Preprocessor::SuggestTypoedDirective(const Token &Tok, if (LangOpts.C2x || LangOpts.CPlusPlus2b) Candidates.insert(Candidates.end(), {"elifdef", "elifndef"}); - if (Optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) { + if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) { // Directive cannot be coming from macro. assert(Tok.getLocation().isFileID()); CharSourceRange DirectiveRange = CharSourceRange::getCharRange( @@ -492,8 +473,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, // lookup pointer. assert(!SkippingExcludedConditionalBlock && "calling SkipExcludedConditionalBlock recursively"); - llvm::SaveAndRestore<bool> SARSkipping(SkippingExcludedConditionalBlock, - true); + llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true); ++NumSkipped; assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?"); @@ -856,7 +836,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, Tok.getLocation()); } -Module *Preprocessor::getModuleForLocation(SourceLocation Loc) { +Module *Preprocessor::getModuleForLocation(SourceLocation Loc, + bool AllowTextual) { if (!SourceMgr.isInMainFile(Loc)) { // Try to determine the module of the include directive. // FIXME: Look into directly passing the FileEntry from LookupFile instead. @@ -864,7 +845,7 @@ Module *Preprocessor::getModuleForLocation(SourceLocation Loc) { if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) { // The include comes from an included file. return HeaderInfo.getModuleMap() - .findModuleForHeader(EntryOfIncl) + .findModuleForHeader(EntryOfIncl, AllowTextual) .getModule(); } } @@ -879,7 +860,8 @@ Module *Preprocessor::getModuleForLocation(SourceLocation Loc) { const FileEntry * Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, SourceLocation Loc) { - Module *IncM = getModuleForLocation(IncLoc); + Module *IncM = getModuleForLocation( + IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes); // Walk up through the include stack, looking through textual headers of M // until we hit a non-textual header that we can #include. (We assume textual @@ -908,6 +890,10 @@ Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, continue; } + // Don't suggest explicitly excluded headers. + if (Header.getRole() == ModuleMap::ExcludedHeader) + continue; + // We'll suggest including textual headers below if they're // include-guarded. if (Header.getRole() & ModuleMap::TextualHeader) @@ -943,17 +929,18 @@ Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, return nullptr; } -Optional<FileEntryRef> Preprocessor::LookupFile( +OptionalFileEntryRef Preprocessor::LookupFile( SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, - bool *IsFrameworkFound, bool SkipCache) { + bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) { ConstSearchDirIterator CurDirLocal = nullptr; ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal; - Module *RequestingModule = getModuleForLocation(FilenameLoc); + Module *RequestingModule = getModuleForLocation( + FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes); bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc); // If the header lookup mechanism may be relative to the current inclusion @@ -1007,7 +994,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile( // the include path until we find that file or run out of files. ConstSearchDirIterator TmpCurDir = CurDir; ConstSearchDirIterator TmpFromDir = nullptr; - while (Optional<FileEntryRef> FE = HeaderInfo.LookupFile( + while (OptionalFileEntryRef FE = HeaderInfo.LookupFile( Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir, Includers, SearchPath, RelativePath, RequestingModule, SuggestedModule, /*IsMapped=*/nullptr, @@ -1025,10 +1012,10 @@ Optional<FileEntryRef> Preprocessor::LookupFile( } // Do a standard file entry lookup. - Optional<FileEntryRef> FE = HeaderInfo.LookupFile( + OptionalFileEntryRef FE = HeaderInfo.LookupFile( Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath, RelativePath, RequestingModule, SuggestedModule, IsMapped, - IsFrameworkFound, SkipCache, BuildSystemModule); + IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures); if (FE) { if (SuggestedModule && !LangOpts.AsmPreprocessor) HeaderInfo.getModuleMap().diagnoseHeaderInclusion( @@ -1043,7 +1030,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile( // headers on the #include stack and pass them to HeaderInfo. if (IsFileLexer()) { if ((CurFileEnt = CurPPLexer->getFileEntry())) { - if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader( + if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader( Filename, CurFileEnt, SearchPath, RelativePath, RequestingModule, SuggestedModule)) { if (SuggestedModule && !LangOpts.AsmPreprocessor) @@ -1058,7 +1045,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile( for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) { if (IsFileLexer(ISEntry)) { if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) { - if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader( + if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader( Filename, CurFileEnt, SearchPath, RelativePath, RequestingModule, SuggestedModule)) { if (SuggestedModule && !LangOpts.AsmPreprocessor) @@ -1072,7 +1059,7 @@ Optional<FileEntryRef> Preprocessor::LookupFile( } // Otherwise, we really couldn't find the file. - return None; + return std::nullopt; } //===----------------------------------------------------------------------===// @@ -1998,7 +1985,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, } } -Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport( +OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport( ConstSearchDirIterator *CurDir, StringRef &Filename, SourceLocation FilenameLoc, CharSourceRange FilenameRange, const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, @@ -2006,24 +1993,26 @@ Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport( const FileEntry *LookupFromFile, StringRef &LookupFilename, SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, ModuleMap::KnownHeader &SuggestedModule, bool isAngled) { - Optional<FileEntryRef> File = LookupFile( - FilenameLoc, LookupFilename, - isAngled, LookupFrom, LookupFromFile, CurDir, + OptionalFileEntryRef File = LookupFile( + FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, &IsFrameworkFound); if (File) return File; + // Give the clients a chance to silently skip this include. + if (Callbacks && Callbacks->FileNotFound(Filename)) + return std::nullopt; + if (SuppressIncludeNotFoundError) - return None; + return std::nullopt; // If the file could not be located and it was included via angle // brackets, we can attempt a lookup as though it were a quoted path to // provide the user with a possible fixit. if (isAngled) { - Optional<FileEntryRef> File = LookupFile( - FilenameLoc, LookupFilename, - false, LookupFrom, LookupFromFile, CurDir, + OptionalFileEntryRef File = LookupFile( + FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, /*IsFrameworkFound=*/nullptr); @@ -2052,9 +2041,9 @@ Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport( StringRef TypoCorrectionName = CorrectTypoFilename(Filename); StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename); - Optional<FileEntryRef> File = LookupFile( - FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom, LookupFromFile, - CurDir, Callbacks ? &SearchPath : nullptr, + OptionalFileEntryRef File = LookupFile( + FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom, + LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, /*IsFrameworkFound=*/nullptr); if (File) { @@ -2090,7 +2079,7 @@ Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport( << CacheEntry.Directory->getName(); } - return None; + return std::nullopt; } /// Handle either a #include-like directive or an import declaration that names @@ -2177,7 +2166,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( BackslashStyle = llvm::sys::path::Style::windows; } - Optional<FileEntryRef> File = LookupHeaderIncludeOrImport( + OptionalFileEntryRef File = LookupHeaderIncludeOrImport( &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok, IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile, LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled); @@ -2215,14 +2204,13 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( alreadyIncluded(*File)) Action = IncludeLimitReached; - bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule && - !isForModuleBuilding(SuggestedModule.getModule(), - getLangOpts().CurrentModule, - getLangOpts().ModuleName); - // FIXME: We do not have a good way to disambiguate C++ clang modules from // C++ standard modules (other than use/non-use of Header Units). Module *SM = SuggestedModule.getModule(); + + bool MaybeTranslateInclude = + Action == Enter && File && SM && !SM->isForBuilding(getLangOpts()); + // Maybe a usable Header Unit bool UsableHeaderUnit = false; if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) { @@ -2235,14 +2223,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( } } // Maybe a usable clang header module. - bool UsableHeaderModule = + bool UsableClangHeaderModule = (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM && !SM->isHeaderUnit(); // Determine whether we should try to import the module for this #include, if // there is one. Don't do so if precompiled module support is disabled or we // are processing this module textually (because we're building the module). - if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) { + if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) { // If this include corresponds to a module but that module is // unavailable, diagnose the situation and bail out. // FIXME: Remove this; loadModule does the same check (but produces @@ -2281,11 +2269,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( if (Imported) { Action = Import; } else if (Imported.isMissingExpected()) { + markClangModuleAsAffecting( + static_cast<Module *>(Imported)->getTopLevelModule()); // We failed to find a submodule that we assumed would exist (because it // was in the directory of an umbrella header, for instance), but no // actual module containing it exists (because the umbrella header is // incomplete). Treat this as a textual inclusion. SuggestedModule = ModuleMap::KnownHeader(); + SM = nullptr; } else if (Imported.isConfigMismatch()) { // On a configuration mismatch, enter the header textually. We still know // that it's part of the corresponding module. @@ -2549,9 +2540,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( // that behaves the same as the header would behave in a compilation using // that PCH, which means we should enter the submodule. We need to teach // the AST serialization layer to deal with the resulting AST. - if (getLangOpts().CompilingPCH && - isForModuleBuilding(SM, getLangOpts().CurrentModule, - getLangOpts().ModuleName)) + if (getLangOpts().CompilingPCH && SM->isForBuilding(getLangOpts())) return {ImportAction::None}; assert(!CurLexerSubmodule && "should not have marked this as a module yet"); diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index bd35689f18e7..aa411cfc5f2c 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -869,7 +869,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, /// to "!defined(X)" return X in IfNDefMacro. Preprocessor::DirectiveEvalResult Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { - SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true); + SaveAndRestore PPDir(ParsingIfOrElifDirective, true); // Save the current state of 'DisableMacroExpansion' and reset it to false. If // 'DisableMacroExpansion' is true, then we must be in a macro argument list // in which case a directive is undefined behavior. We want macros to be able diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 36d3aa59bb2f..66168467ecf5 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/Path.h" +#include <optional> using namespace clang; @@ -75,7 +76,7 @@ bool Preprocessor::EnterSourceFile(FileID FID, ConstSearchDirIterator CurDir, MaxIncludeStackDepth = IncludeMacroStack.size(); // Get the MemoryBuffer for this FID, if it fails, we fail. - llvm::Optional<llvm::MemoryBufferRef> InputFile = + std::optional<llvm::MemoryBufferRef> InputFile = getSourceManager().getBufferOrNone(FID, Loc); if (!InputFile) { SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID); @@ -94,8 +95,8 @@ bool Preprocessor::EnterSourceFile(FileID FID, ConstSearchDirIterator CurDir, Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile); if (getPreprocessorOpts().DependencyDirectivesForFile && FID != PredefinesFileID) { - if (Optional<FileEntryRef> File = SourceMgr.getFileEntryRefForID(FID)) { - if (Optional<ArrayRef<dependency_directives_scan::Directive>> + if (OptionalFileEntryRef File = SourceMgr.getFileEntryRefForID(FID)) { + if (std::optional<ArrayRef<dependency_directives_scan::Directive>> DepDirectives = getPreprocessorOpts().DependencyDirectivesForFile(*File)) { TheLexer->DepDirectives = *DepDirectives; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index f3be2107f985..bbc271e5611e 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -37,8 +37,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -54,6 +52,7 @@ #include <cstddef> #include <cstring> #include <ctime> +#include <optional> #include <string> #include <tuple> #include <utility> @@ -285,7 +284,8 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) { // Dump module macros. llvm::DenseSet<ModuleMacro*> Active; - for (auto *MM : State ? State->getActiveModuleMacros(*this, II) : None) + for (auto *MM : + State ? State->getActiveModuleMacros(*this, II) : std::nullopt) Active.insert(MM); llvm::DenseSet<ModuleMacro*> Visited; llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf.begin(), Leaf.end()); @@ -371,6 +371,8 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__has_feature = RegisterBuiltinMacro(*this, "__has_feature"); Ident__has_extension = RegisterBuiltinMacro(*this, "__has_extension"); Ident__has_builtin = RegisterBuiltinMacro(*this, "__has_builtin"); + Ident__has_constexpr_builtin = + RegisterBuiltinMacro(*this, "__has_constexpr_builtin"); Ident__has_attribute = RegisterBuiltinMacro(*this, "__has_attribute"); if (!getLangOpts().CPlusPlus) Ident__has_c_attribute = RegisterBuiltinMacro(*this, "__has_c_attribute"); @@ -387,6 +389,10 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__is_target_os = RegisterBuiltinMacro(*this, "__is_target_os"); Ident__is_target_environment = RegisterBuiltinMacro(*this, "__is_target_environment"); + Ident__is_target_variant_os = + RegisterBuiltinMacro(*this, "__is_target_variant_os"); + Ident__is_target_variant_environment = + RegisterBuiltinMacro(*this, "__is_target_variant_environment"); // Modules. Ident__building_module = RegisterBuiltinMacro(*this, "__building_module"); @@ -1081,8 +1087,15 @@ void Preprocessor::removeCachedMacroExpandedTokensOfLastLexer() { /// the identifier tokens inserted. static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, Preprocessor &PP) { - time_t TT = time(nullptr); - struct tm *TM = localtime(&TT); + time_t TT; + std::tm *TM; + if (PP.getPreprocessorOpts().SourceDateEpoch) { + TT = *PP.getPreprocessorOpts().SourceDateEpoch; + TM = std::gmtime(&TT); + } else { + TT = std::time(nullptr); + TM = std::localtime(&TT); + } static const char * const Months[] = { "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" @@ -1091,8 +1104,11 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, { SmallString<32> TmpBuffer; llvm::raw_svector_ostream TmpStream(TmpBuffer); - TmpStream << llvm::format("\"%s %2d %4d\"", Months[TM->tm_mon], - TM->tm_mday, TM->tm_year + 1900); + if (TM) + TmpStream << llvm::format("\"%s %2d %4d\"", Months[TM->tm_mon], + TM->tm_mday, TM->tm_year + 1900); + else + TmpStream << "??? ?? ????"; Token TmpTok; TmpTok.startToken(); PP.CreateString(TmpStream.str(), TmpTok); @@ -1102,8 +1118,11 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, { SmallString<32> TmpBuffer; llvm::raw_svector_ostream TmpStream(TmpBuffer); - TmpStream << llvm::format("\"%02d:%02d:%02d\"", - TM->tm_hour, TM->tm_min, TM->tm_sec); + if (TM) + TmpStream << llvm::format("\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, + TM->tm_sec); + else + TmpStream << "??:??:??"; Token TmpTok; TmpTok.startToken(); PP.CreateString(TmpStream.str(), TmpTok); @@ -1230,7 +1249,7 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, return false; // Search include directories. - Optional<FileEntryRef> File = + OptionalFileEntryRef File = PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, LookupFromFile, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); @@ -1282,7 +1301,7 @@ static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS, unsigned ParenDepth = 1; SourceLocation LParenLoc = Tok.getLocation(); - llvm::Optional<int> Result; + std::optional<int> Result; Token ResultTok; bool SuppressDiagnostic = false; @@ -1326,10 +1345,10 @@ already_lexed: // The last ')' has been reached; return the value if one found or // a diagnostic and a dummy value. if (Result) { - OS << Result.value(); + OS << *Result; // For strict conformance to __has_cpp_attribute rules, use 'L' // suffix for dated literals. - if (Result.value() > 1) + if (*Result > 1) OS << 'L'; } else { OS << 0; @@ -1428,9 +1447,47 @@ static bool isTargetEnvironment(const TargetInfo &TI, const IdentifierInfo *II) { std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str(); llvm::Triple Env(EnvName); + // The unknown environment is matched only if + // '__is_target_environment(unknown)' is used. + if (Env.getEnvironment() == llvm::Triple::UnknownEnvironment && + EnvName != "---unknown") + return false; return TI.getTriple().getEnvironment() == Env.getEnvironment(); } +/// Implements the __is_target_variant_os builtin macro. +static bool isTargetVariantOS(const TargetInfo &TI, const IdentifierInfo *II) { + if (TI.getTriple().isOSDarwin()) { + const llvm::Triple *VariantTriple = TI.getDarwinTargetVariantTriple(); + if (!VariantTriple) + return false; + + std::string OSName = + (llvm::Twine("unknown-unknown-") + II->getName().lower()).str(); + llvm::Triple OS(OSName); + if (OS.getOS() == llvm::Triple::Darwin) { + // Darwin matches macos, ios, etc. + return VariantTriple->isOSDarwin(); + } + return VariantTriple->getOS() == OS.getOS(); + } + return false; +} + +/// Implements the __is_target_variant_environment builtin macro. +static bool isTargetVariantEnvironment(const TargetInfo &TI, + const IdentifierInfo *II) { + if (TI.getTriple().isOSDarwin()) { + const llvm::Triple *VariantTriple = TI.getDarwinTargetVariantTriple(); + if (!VariantTriple) + return false; + std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str(); + llvm::Triple Env(EnvName); + return VariantTriple->getEnvironment() == Env.getEnvironment(); + } + return false; +} + /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded /// as a builtin macro, handle it and return the next token as 'Tok'. void Preprocessor::ExpandBuiltinMacro(Token &Tok) { @@ -1556,22 +1613,24 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Diag(Tok.getLocation(), diag::warn_pp_date_time); // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. - - // Get the file that we are lexing out of. If we're currently lexing from - // a macro, dig into the include stack. - const FileEntry *CurFile = nullptr; - PreprocessorLexer *TheLexer = getCurrentFileLexer(); - - if (TheLexer) - CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID()); - const char *Result; - if (CurFile) { - time_t TT = CurFile->getModificationTime(); - struct tm *TM = localtime(&TT); + if (getPreprocessorOpts().SourceDateEpoch) { + time_t TT = *getPreprocessorOpts().SourceDateEpoch; + std::tm *TM = std::gmtime(&TT); Result = asctime(TM); } else { - Result = "??? ??? ?? ??:??:?? ????\n"; + // Get the file that we are lexing out of. If we're currently lexing from + // a macro, dig into the include stack. + const FileEntry *CurFile = nullptr; + if (PreprocessorLexer *TheLexer = getCurrentFileLexer()) + CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID()); + if (CurFile) { + time_t TT = CurFile->getModificationTime(); + struct tm *TM = localtime(&TT); + Result = asctime(TM); + } else { + Result = "??? ??? ?? ??:??:?? ????\n"; + } } // Surround the string with " and strip the trailing newline. OS << '"' << StringRef(Result).drop_back() << '"'; @@ -1663,7 +1722,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { .Case("__array_rank", true) .Case("__array_extent", true) .Case("__reference_binds_to_temporary", true) - .Case("__underlying_type", true) +#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) .Case("__" #Trait, true) +#include "clang/Basic/TransformTypeTraits.def" .Default(false); } else { return llvm::StringSwitch<bool>(II->getName()) @@ -1677,9 +1737,23 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { .Case("__is_target_vendor", true) .Case("__is_target_os", true) .Case("__is_target_environment", true) + .Case("__is_target_variant_os", true) + .Case("__is_target_variant_environment", true) .Default(false); } }); + } else if (II == Ident__has_constexpr_builtin) { + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, false, + [this](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + if (!II) + return false; + unsigned BuiltinOp = II->getBuiltinID(); + return BuiltinOp != 0 && + this->getBuiltinInfo().isConstantEvaluated(BuiltinOp); + }); } else if (II == Ident__is_identifier) { EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, [](Token &Tok, bool &HasLexedNextToken) -> int { @@ -1877,6 +1951,22 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Tok, *this, diag::err_feature_check_malformed); return II && isTargetEnvironment(getTargetInfo(), II); }); + } else if (II == Ident__is_target_variant_os) { + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, false, + [this](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + return II && isTargetVariantOS(getTargetInfo(), II); + }); + } else if (II == Ident__is_target_variant_environment) { + EvaluateFeatureLikeBuiltinMacro( + OS, Tok, II, *this, false, + [this](Token &Tok, bool &HasLexedNextToken) -> int { + IdentifierInfo *II = ExpectFeatureIdentifierInfo( + Tok, *this, diag::err_feature_check_malformed); + return II && isTargetVariantEnvironment(getTargetInfo(), II); + }); } else { llvm_unreachable("Unknown identifier!"); } diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index fb4f2dc45758..4da9d1603770 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -48,6 +48,7 @@ #include <cstddef> #include <cstdint> #include <limits> +#include <optional> #include <string> #include <utility> #include <vector> @@ -527,7 +528,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { return; // Search include directories for this file. - Optional<FileEntryRef> File = + OptionalFileEntryRef File = LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); if (!File) { @@ -1043,7 +1044,7 @@ struct PragmaDebugHandler : public PragmaHandler { Token Tok; PP.LexUnexpandedToken(Tok); if (Tok.isNot(tok::identifier)) { - PP.Diag(Tok, diag::warn_pragma_diagnostic_invalid); + PP.Diag(Tok, diag::warn_pragma_debug_missing_command); return; } IdentifierInfo *II = Tok.getIdentifierInfo(); @@ -1181,6 +1182,23 @@ struct PragmaDebugHandler : public PragmaHandler { PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command) << DumpII->getName(); } + } else if (II->isStr("sloc_usage")) { + // An optional integer literal argument specifies the number of files to + // specifically report information about. + std::optional<unsigned> MaxNotes; + Token ArgToken; + PP.Lex(ArgToken); + uint64_t Value; + if (ArgToken.is(tok::numeric_constant) && + PP.parseSimpleIntegerLiteral(ArgToken, Value)) { + MaxNotes = Value; + } else if (ArgToken.isNot(tok::eod)) { + PP.Diag(ArgToken, diag::warn_pragma_debug_unexpected_argument); + } + + PP.Diag(Tok, diag::remark_sloc_usage); + PP.getSourceManager().noteSLocAddressSpaceUsage(PP.getDiagnostics(), + MaxNotes); } else { PP.Diag(Tok, diag::warn_pragma_debug_unexpected_command) << II->getName(); @@ -1940,6 +1958,15 @@ struct PragmaRegionHandler : public PragmaHandler { } }; +/// "\#pragma managed" +/// "\#pragma managed(...)" +/// "\#pragma unmanaged" +/// MSVC ignores this pragma when not compiling using /clr, which clang doesn't +/// support. We parse it and ignore it to avoid -Wunknown-pragma warnings. +struct PragmaManagedHandler : public EmptyPragmaHandler { + PragmaManagedHandler(const char *pragma) : EmptyPragmaHandler(pragma) {} +}; + /// This handles parsing pragmas that take a macro name and optional message static IdentifierInfo *HandleMacroAnnotationPragma(Preprocessor &PP, Token &Tok, const char *Pragma, @@ -2112,6 +2139,8 @@ void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler(new PragmaIncludeAliasHandler()); AddPragmaHandler(new PragmaHdrstopHandler()); AddPragmaHandler(new PragmaSystemHeaderHandler()); + AddPragmaHandler(new PragmaManagedHandler("managed")); + AddPragmaHandler(new PragmaManagedHandler("unmanaged")); } // Pragmas added by plugins diff --git a/clang/lib/Lex/PreprocessingRecord.cpp b/clang/lib/Lex/PreprocessingRecord.cpp index 2146a7c04217..85eb57f61611 100644 --- a/clang/lib/Lex/PreprocessingRecord.cpp +++ b/clang/lib/Lex/PreprocessingRecord.cpp @@ -20,7 +20,6 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Token.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" @@ -31,6 +30,7 @@ #include <cstddef> #include <cstring> #include <iterator> +#include <optional> #include <utility> #include <vector> @@ -42,7 +42,7 @@ ExternalPreprocessingRecordSource::~ExternalPreprocessingRecordSource() = InclusionDirective::InclusionDirective(PreprocessingRecord &PPRec, InclusionKind Kind, StringRef FileName, bool InQuotes, bool ImportedModule, - Optional<FileEntryRef> File, + OptionalFileEntryRef File, SourceRange Range) : PreprocessingDirective(InclusionDirectiveKind, Range), InQuotes(InQuotes), Kind(Kind), ImportedModule(ImportedModule), File(File) { @@ -112,10 +112,9 @@ bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { // See if the external source can see if the entity is in the file without // deserializing it. - Optional<bool> IsInFile = - ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID); - if (IsInFile) - return IsInFile.value(); + if (std::optional<bool> IsInFile = + ExternalSource->isPreprocessedEntityInFileID(LoadedIndex, FID)) + return *IsInFile; // The external source did not provide a definite answer, go and deserialize // the entity to check it. @@ -476,15 +475,9 @@ void PreprocessingRecord::MacroUndefined(const Token &Id, } void PreprocessingRecord::InclusionDirective( - SourceLocation HashLoc, - const Token &IncludeTok, - StringRef FileName, - bool IsAngled, - CharSourceRange FilenameRange, - Optional<FileEntryRef> File, - StringRef SearchPath, - StringRef RelativePath, - const Module *Imported, + SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, + bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, + StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) { InclusionDirective::InclusionKind Kind = InclusionDirective::Include; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 5310db3c882b..fe9adb5685e3 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,7 +58,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -66,6 +65,7 @@ #include <algorithm> #include <cassert> #include <memory> +#include <optional> #include <string> #include <utility> #include <vector> @@ -166,12 +166,6 @@ Preprocessor::~Preprocessor() { IncludeMacroStack.clear(); - // Destroy any macro definitions. - while (MacroInfoChain *I = MIChainHead) { - MIChainHead = I->Next; - I->~MacroInfoChain(); - } - // Free any cached macro expanders. // This populates MacroArgCache, so all TokenLexers need to be destroyed // before the code below that frees up the MacroArgCache list. @@ -406,7 +400,7 @@ bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, assert(!CodeCompletionFile && "Already set"); // Load the actual file's contents. - Optional<llvm::MemoryBufferRef> Buffer = + std::optional<llvm::MemoryBufferRef> Buffer = SourceMgr.getMemoryBufferForFileOrNone(File); if (!Buffer) return true; @@ -535,6 +529,13 @@ Module *Preprocessor::getCurrentModule() { return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); } +Module *Preprocessor::getCurrentModuleImplementation() { + if (!getLangOpts().isCompilingModuleImplementation()) + return nullptr; + + return getHeaderSearchInfo().lookupModule(getLangOpts().ModuleName); +} + //===----------------------------------------------------------------------===// // Preprocessor Initialization Methods //===----------------------------------------------------------------------===// @@ -580,7 +581,7 @@ void Preprocessor::EnterMainSourceFile() { if (!PPOpts->PCHThroughHeader.empty()) { // Lookup and save the FileID for the through header. If it isn't found // in the search path, it's a fatal error. - Optional<FileEntryRef> File = LookupFile( + OptionalFileEntryRef File = LookupFile( SourceLocation(), PPOpts->PCHThroughHeader, /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, @@ -773,29 +774,6 @@ void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); } -/// Returns a diagnostic message kind for reporting a future keyword as -/// appropriate for the identifier and specified language. -static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, - const LangOptions &LangOpts) { - assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); - - if (LangOpts.CPlusPlus) - return llvm::StringSwitch<diag::kind>(II.getName()) -#define CXX11_KEYWORD(NAME, FLAGS) \ - .Case(#NAME, diag::warn_cxx11_keyword) -#define CXX20_KEYWORD(NAME, FLAGS) \ - .Case(#NAME, diag::warn_cxx20_keyword) -#include "clang/Basic/TokenKinds.def" - // char8_t is not modeled as a CXX20_KEYWORD because it's not - // unconditionally enabled in C++20 mode. (It can be disabled - // by -fno-char8_t.) - .Case("char8_t", diag::warn_cxx20_keyword) - ; - - llvm_unreachable( - "Keyword not known to come from a newer Standard or proposed Standard"); -} - void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { assert(II.isOutOfDate() && "not out of date"); getExternalSource()->updateOutOfDateIdentifier(II); @@ -867,7 +845,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { // FIXME: This warning is disabled in cases where it shouldn't be, like // "#define constexpr constexpr", "int constexpr;" if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { - Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) + Diag(Identifier, getIdentifierTable().getFutureCompatDiagKind(II, getLangOpts())) << II.getName(); // Don't diagnose this keyword again in this translation unit. II.setIsFutureCompatKeyword(false); @@ -894,7 +872,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { (getLangOpts().Modules || getLangOpts().DebuggerSupport) && CurLexerKind != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); - ModuleImportPath.clear(); + NamedModuleImportPath.clear(); ModuleImportExpectsIdentifier = true; CurLexerKind = CLK_LexAfterModuleImport; } @@ -939,57 +917,57 @@ void Preprocessor::Lex(Token &Result) { Result.setIdentifierInfo(nullptr); } - // Update ImportSeqState to track our position within a C++20 import-seq + // Update StdCXXImportSeqState to track our position within a C++20 import-seq // if this token is being produced as a result of phase 4 of translation. // Update TrackGMFState to decide if we are currently in a Global Module - // Fragment. GMF state updates should precede ImportSeq ones, since GMF state - // depends on the prevailing ImportSeq state in two cases. + // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state + // depends on the prevailing StdCXXImportSeq state in two cases. if (getLangOpts().CPlusPlusModules && LexLevel == 1 && !Result.getFlag(Token::IsReinjected)) { switch (Result.getKind()) { case tok::l_paren: case tok::l_square: case tok::l_brace: - ImportSeqState.handleOpenBracket(); + StdCXXImportSeqState.handleOpenBracket(); break; case tok::r_paren: case tok::r_square: - ImportSeqState.handleCloseBracket(); + StdCXXImportSeqState.handleCloseBracket(); break; case tok::r_brace: - ImportSeqState.handleCloseBrace(); + StdCXXImportSeqState.handleCloseBrace(); break; // This token is injected to represent the translation of '#include "a.h"' // into "import a.h;". Mimic the notional ';'. case tok::annot_module_include: case tok::semi: TrackGMFState.handleSemi(); - ImportSeqState.handleSemi(); + StdCXXImportSeqState.handleSemi(); break; case tok::header_name: case tok::annot_header_unit: - ImportSeqState.handleHeaderName(); + StdCXXImportSeqState.handleHeaderName(); break; case tok::kw_export: TrackGMFState.handleExport(); - ImportSeqState.handleExport(); + StdCXXImportSeqState.handleExport(); break; case tok::identifier: if (Result.getIdentifierInfo()->isModulesImport()) { - TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq()); - ImportSeqState.handleImport(); - if (ImportSeqState.afterImportSeq()) { + TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); + StdCXXImportSeqState.handleImport(); + if (StdCXXImportSeqState.afterImportSeq()) { ModuleImportLoc = Result.getLocation(); - ModuleImportPath.clear(); + NamedModuleImportPath.clear(); ModuleImportExpectsIdentifier = true; CurLexerKind = CLK_LexAfterModuleImport; } break; } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { - TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq()); + TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: TrackGMFState.handleMisc(); - ImportSeqState.handleMisc(); + StdCXXImportSeqState.handleMisc(); break; } } @@ -1170,7 +1148,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // For now, we only support header-name imports in C++20 mode. // FIXME: Should we allow this in all language modes that support an import // declaration as an extension? - if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { + if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { if (LexHeaderName(Result)) return true; } else { @@ -1232,7 +1210,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { Suffix.back().setLocation(SemiLoc); Suffix.back().setAnnotationEndLoc(SemiLoc); Suffix.back().setAnnotationValue(Action.ModuleForHeader); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ImportAction::ModuleImport: case ImportAction::HeaderUnitImport: @@ -1266,7 +1244,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { // We expected to see an identifier here, and we did; continue handling // identifiers. - ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), + NamedModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); ModuleImportExpectsIdentifier = false; CurLexerKind = CLK_LexAfterModuleImport; @@ -1283,7 +1261,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { } // If we didn't recognize a module name at all, this is not a (valid) import. - if (ModuleImportPath.empty() || Result.is(tok::eof)) + if (NamedModuleImportPath.empty() || Result.is(tok::eof)) return true; // Consume the pp-import-suffix and expand any macros in it now, if we're not @@ -1306,28 +1284,28 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // FIXME: Is this the right level to be performing this transformation? std::string FlatModuleName; if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) { - for (auto &Piece : ModuleImportPath) { + for (auto &Piece : NamedModuleImportPath) { if (!FlatModuleName.empty()) FlatModuleName += "."; FlatModuleName += Piece.first->getName(); } - SourceLocation FirstPathLoc = ModuleImportPath[0].second; - ModuleImportPath.clear(); - ModuleImportPath.push_back( + SourceLocation FirstPathLoc = NamedModuleImportPath[0].second; + NamedModuleImportPath.clear(); + NamedModuleImportPath.push_back( std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); } Module *Imported = nullptr; if (getLangOpts().Modules) { Imported = TheModuleLoader.loadModule(ModuleImportLoc, - ModuleImportPath, + NamedModuleImportPath, Module::Hidden, /*IsInclusionDirective=*/false); if (Imported) makeModuleVisible(Imported, SemiLoc); } if (Callbacks) - Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); + Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported); if (!Suffix.empty()) { EnterTokens(Suffix); diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp index f6b005d9e19c..1b3201bd805b 100644 --- a/clang/lib/Lex/TokenConcatenation.cpp +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -240,7 +240,7 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, // it as an identifier. if (!PrevTok.hasUDSuffix()) return false; - LLVM_FALLTHROUGH; + [[fallthrough]]; case tok::identifier: // id+id or id+number or id+L"foo". // id+'.'... will not append. if (Tok.is(tok::numeric_constant)) diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index efda6d0046fa..c6968b9f417e 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -25,11 +25,13 @@ #include "clang/Lex/Token.h" #include "clang/Lex/VariadicMacroSupport.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include <cassert> #include <cstring> +#include <optional> using namespace clang; @@ -203,7 +205,7 @@ void TokenLexer::stringifyVAOPTContents( assert(CurTokenIdx != 0 && "Can not have __VAOPT__ contents begin with a ##"); Token &LHS = VAOPTTokens[CurTokenIdx - 1]; - pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens), + pasteTokens(LHS, llvm::ArrayRef(VAOPTTokens, NumVAOptTokens), CurTokenIdx); // Replace the token prior to the first ## in this iteration. ConcatenatedVAOPTResultToks.back() = LHS; @@ -247,7 +249,7 @@ void TokenLexer::ExpandFunctionArguments() { // we install the newly expanded sequence as the new 'Tokens' list. bool MadeChange = false; - Optional<bool> CalledWithVariadicArguments; + std::optional<bool> CalledWithVariadicArguments; VAOptExpansionContext VCtx(PP); @@ -721,7 +723,7 @@ bool TokenLexer::Lex(Token &Tok) { } bool TokenLexer::pasteTokens(Token &Tok) { - return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx); + return pasteTokens(Tok, llvm::ArrayRef(Tokens, NumTokens), CurTokenIdx); } /// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ## @@ -984,65 +986,71 @@ TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { /// \arg begin_tokens will be updated to a position past all the found /// consecutive tokens. static void updateConsecutiveMacroArgTokens(SourceManager &SM, - SourceLocation InstLoc, + SourceLocation ExpandLoc, Token *&begin_tokens, Token * end_tokens) { - assert(begin_tokens < end_tokens); - - SourceLocation FirstLoc = begin_tokens->getLocation(); - SourceLocation CurLoc = FirstLoc; - - // Compare the source location offset of tokens and group together tokens that - // are close, even if their locations point to different FileIDs. e.g. - // - // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs) - // ^ ^ - // |bar foo cake| (one SLocEntry chunk for all tokens) - // - // we can perform this "merge" since the token's spelling location depends - // on the relative offset. - - Token *NextTok = begin_tokens + 1; - for (; NextTok < end_tokens; ++NextTok) { - SourceLocation NextLoc = NextTok->getLocation(); - if (CurLoc.isFileID() != NextLoc.isFileID()) - break; // Token from different kind of FileID. - - SourceLocation::IntTy RelOffs; - if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs)) - break; // Token from different local/loaded location. - // Check that token is not before the previous token or more than 50 - // "characters" away. - if (RelOffs < 0 || RelOffs > 50) - break; - - if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc)) - break; // Token from a different macro. - - CurLoc = NextLoc; + assert(begin_tokens + 1 < end_tokens); + SourceLocation BeginLoc = begin_tokens->getLocation(); + llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens); + llvm::MutableArrayRef<Token> Partition; + + auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable { + // The maximum distance between two consecutive tokens in a partition. + // This is an important trick to avoid using too much SourceLocation address + // space! + static constexpr SourceLocation::IntTy MaxDistance = 50; + auto Distance = Loc.getRawEncoding() - Last.getRawEncoding(); + Last = Loc; + return Distance <= MaxDistance; + }; + + // Partition the tokens by their FileID. + // This is a hot function, and calling getFileID can be expensive, the + // implementation is optimized by reducing the number of getFileID. + if (BeginLoc.isFileID()) { + // Consecutive tokens not written in macros must be from the same file. + // (Neither #include nor eof can occur inside a macro argument.) + Partition = All.take_while([&](const Token &T) { + return T.getLocation().isFileID() && NearLast(T.getLocation()); + }); + } else { + // Call getFileID once to calculate the bounds, and use the cheaper + // sourcelocation-against-bounds comparison. + FileID BeginFID = SM.getFileID(BeginLoc); + SourceLocation Limit = + SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID)); + Partition = All.take_while([&](const Token &T) { + return T.getLocation() >= BeginLoc && T.getLocation() < Limit && + NearLast(T.getLocation()); + }); } + assert(!Partition.empty()); // For the consecutive tokens, find the length of the SLocEntry to contain // all of them. - Token &LastConsecutiveTok = *(NextTok-1); - SourceLocation::IntTy LastRelOffs = 0; - SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), - &LastRelOffs); SourceLocation::UIntTy FullLength = - LastRelOffs + LastConsecutiveTok.getLength(); - + Partition.back().getEndLoc().getRawEncoding() - + Partition.front().getLocation().getRawEncoding(); // Create a macro expansion SLocEntry that will "contain" all of the tokens. SourceLocation Expansion = - SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); - + SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength); + +#ifdef EXPENSIVE_CHECKS + assert(llvm::all_of(Partition.drop_front(), + [&SM, ID = SM.getFileID(Partition.front().getLocation())]( + const Token &T) { + return ID == SM.getFileID(T.getLocation()); + }) && + "Must have the same FIleID!"); +#endif // Change the location of the tokens from the spelling location to the new // expanded location. - for (; begin_tokens < NextTok; ++begin_tokens) { - Token &Tok = *begin_tokens; - SourceLocation::IntTy RelOffs = 0; - SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); - Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); + for (Token& T : Partition) { + SourceLocation::IntTy RelativeOffset = + T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding(); + T.setLocation(Expansion.getLocWithOffset(RelativeOffset)); } + begin_tokens = &Partition.back() + 1; } /// Creates SLocEntries and updates the locations of macro argument @@ -1055,7 +1063,7 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, Token *end_tokens) { SourceManager &SM = PP.getSourceManager(); - SourceLocation InstLoc = + SourceLocation ExpandLoc = getExpansionLocForMacroDefLoc(ArgIdSpellLoc); while (begin_tokens < end_tokens) { @@ -1063,12 +1071,12 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, if (end_tokens - begin_tokens == 1) { Token &Tok = *begin_tokens; Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), - InstLoc, + ExpandLoc, Tok.getLength())); return; } - updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); + updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens); } } diff --git a/clang/lib/Lex/UnicodeCharSets.h b/clang/lib/Lex/UnicodeCharSets.h index e79a85bc72b7..5316d2540b76 100644 --- a/clang/lib/Lex/UnicodeCharSets.h +++ b/clang/lib/Lex/UnicodeCharSets.h @@ -10,7 +10,7 @@ #include "llvm/Support/UnicodeCharRanges.h" -// Unicode 14 XID_Start +// Unicode 15.0 XID_Start static const llvm::sys::UnicodeCharRange XIDStartRanges[] = { {0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA}, {0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6}, @@ -170,69 +170,72 @@ static const llvm::sys::UnicodeCharRange XIDStartRanges[] = { {0x11144, 0x11144}, {0x11147, 0x11147}, {0x11150, 0x11172}, {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211}, - {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288}, - {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, - {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, - {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, - {0x11335, 0x11339}, {0x1133D, 0x1133D}, {0x11350, 0x11350}, - {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, - {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5}, - {0x114C7, 0x114C7}, {0x11580, 0x115AE}, {0x115D8, 0x115DB}, - {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, - {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11740, 0x11746}, - {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, - {0x11909, 0x11909}, {0x1190C, 0x11913}, {0x11915, 0x11916}, - {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, - {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, - {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, {0x11A0B, 0x11A32}, - {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, - {0x11A9D, 0x11A9D}, {0x11AB0, 0x11AF8}, {0x11C00, 0x11C08}, - {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F}, - {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, - {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, - {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, - {0x11FB0, 0x11FB0}, {0x12000, 0x12399}, {0x12400, 0x1246E}, - {0x12480, 0x12543}, {0x12F90, 0x12FF0}, {0x13000, 0x1342E}, - {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, - {0x16A70, 0x16ABE}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, - {0x16B40, 0x16B43}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, - {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, - {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, - {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, - {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, - {0x1B000, 0x1B122}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, - {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, - {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, - {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, - {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, - {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, - {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C}, - {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, - {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, - {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA}, - {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, - {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8}, - {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1DF00, 0x1DF1E}, + {0x11213, 0x1122B}, {0x1123F, 0x11240}, {0x11280, 0x11286}, + {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, + {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, + {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, + {0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133D, 0x1133D}, + {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, + {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF}, + {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE}, + {0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644}, + {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, + {0x11740, 0x11746}, {0x11800, 0x1182B}, {0x118A0, 0x118DF}, + {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913}, + {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, + {0x11941, 0x11941}, {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, + {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, + {0x11A0B, 0x11A32}, {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, + {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AB0, 0x11AF8}, + {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, + {0x11C72, 0x11C8F}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, + {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, + {0x11D67, 0x11D68}, {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, + {0x11EE0, 0x11EF2}, {0x11F02, 0x11F02}, {0x11F04, 0x11F10}, + {0x11F12, 0x11F33}, {0x11FB0, 0x11FB0}, {0x12000, 0x12399}, + {0x12400, 0x1246E}, {0x12480, 0x12543}, {0x12F90, 0x12FF0}, + {0x13000, 0x1342F}, {0x13441, 0x13446}, {0x14400, 0x14646}, + {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A70, 0x16ABE}, + {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, + {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, + {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, + {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, + {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, {0x1AFF0, 0x1AFF3}, + {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, {0x1B000, 0x1B122}, + {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, {0x1B155, 0x1B155}, + {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, + {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, + {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, + {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, + {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, + {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, + {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, + {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, + {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, + {0x1D6DC, 0x1D6FA}, {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, + {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, + {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, + {0x1DF00, 0x1DF1E}, {0x1DF25, 0x1DF2A}, {0x1E030, 0x1E06D}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D}, {0x1E14E, 0x1E14E}, - {0x1E290, 0x1E2AD}, {0x1E2C0, 0x1E2EB}, {0x1E7E0, 0x1E7E6}, - {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE}, - {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B}, - {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, - {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, {0x1EE29, 0x1EE32}, - {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, - {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, - {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52}, - {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, - {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F}, - {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, - {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, - {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, - {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, - {0x20000, 0x2A6DF}, {0x2A700, 0x2B738}, {0x2B740, 0x2B81D}, - {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, - {0x30000, 0x3134A}}; - -// Unicode 14 XID_Continue, excluding XID_Start + {0x1E290, 0x1E2AD}, {0x1E2C0, 0x1E2EB}, {0x1E4D0, 0x1E4EB}, + {0x1E7E0, 0x1E7E6}, {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, + {0x1E7F0, 0x1E7FE}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, + {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, + {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, + {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, + {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, + {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, + {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, + {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, + {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, + {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, + {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, + {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, + {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DF}, {0x2A700, 0x2B739}, + {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, + {0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, {0x31350, 0x323AF}}; + +// Unicode 15.0 XID_Continue, excluding XID_Start // The Unicode Property XID_Continue is a super set of XID_Start. // To save Space, the table below only contains the codepoints // that are not also in XID_Start. @@ -268,64 +271,65 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = { {0x0C66, 0x0C6F}, {0x0C81, 0x0C83}, {0x0CBC, 0x0CBC}, {0x0CBE, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CE2, 0x0CE3}, {0x0CE6, 0x0CEF}, - {0x0D00, 0x0D03}, {0x0D3B, 0x0D3C}, {0x0D3E, 0x0D44}, - {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D57, 0x0D57}, - {0x0D62, 0x0D63}, {0x0D66, 0x0D6F}, {0x0D81, 0x0D83}, - {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, {0x0DD6, 0x0DD6}, - {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, {0x0DF2, 0x0DF3}, - {0x0E31, 0x0E31}, {0x0E33, 0x0E3A}, {0x0E47, 0x0E4E}, - {0x0E50, 0x0E59}, {0x0EB1, 0x0EB1}, {0x0EB3, 0x0EBC}, - {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, {0x0F18, 0x0F19}, - {0x0F20, 0x0F29}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, - {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F}, {0x0F71, 0x0F84}, - {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, {0x0F99, 0x0FBC}, - {0x0FC6, 0x0FC6}, {0x102B, 0x103E}, {0x1040, 0x1049}, - {0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064}, - {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D}, - {0x108F, 0x109D}, {0x135D, 0x135F}, {0x1369, 0x1371}, - {0x1712, 0x1715}, {0x1732, 0x1734}, {0x1752, 0x1753}, - {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD}, - {0x17E0, 0x17E9}, {0x180B, 0x180D}, {0x180F, 0x1819}, - {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B}, - {0x1946, 0x194F}, {0x19D0, 0x19DA}, {0x1A17, 0x1A1B}, - {0x1A55, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, - {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD}, {0x1ABF, 0x1ACE}, - {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B50, 0x1B59}, - {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD}, - {0x1BB0, 0x1BB9}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37}, - {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x1CD0, 0x1CD2}, - {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4}, - {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF}, {0x203F, 0x2040}, - {0x2054, 0x2054}, {0x20D0, 0x20DC}, {0x20E1, 0x20E1}, - {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, - {0x2DE0, 0x2DFF}, {0x302A, 0x302F}, {0x3099, 0x309A}, - {0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D}, - {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, - {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827}, - {0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C5}, - {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA909}, - {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983}, - {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5}, - {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43}, - {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7D}, - {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, - {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, - {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED}, - {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, - {0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F}, - {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, {0xFF9E, 0xFF9F}, - {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, - {0x104A0, 0x104A9}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, - {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F}, - {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10D30, 0x10D39}, - {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x10F82, 0x10F85}, - {0x11000, 0x11002}, {0x11038, 0x11046}, {0x11066, 0x11070}, - {0x11073, 0x11074}, {0x1107F, 0x11082}, {0x110B0, 0x110BA}, - {0x110C2, 0x110C2}, {0x110F0, 0x110F9}, {0x11100, 0x11102}, - {0x11127, 0x11134}, {0x11136, 0x1113F}, {0x11145, 0x11146}, - {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0}, - {0x111C9, 0x111CC}, {0x111CE, 0x111D9}, {0x1122C, 0x11237}, - {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x112F0, 0x112F9}, + {0x0CF3, 0x0CF3}, {0x0D00, 0x0D03}, {0x0D3B, 0x0D3C}, + {0x0D3E, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, + {0x0D57, 0x0D57}, {0x0D62, 0x0D63}, {0x0D66, 0x0D6F}, + {0x0D81, 0x0D83}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, + {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, + {0x0DF2, 0x0DF3}, {0x0E31, 0x0E31}, {0x0E33, 0x0E3A}, + {0x0E47, 0x0E4E}, {0x0E50, 0x0E59}, {0x0EB1, 0x0EB1}, + {0x0EB3, 0x0EBC}, {0x0EC8, 0x0ECE}, {0x0ED0, 0x0ED9}, + {0x0F18, 0x0F19}, {0x0F20, 0x0F29}, {0x0F35, 0x0F35}, + {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F}, + {0x0F71, 0x0F84}, {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, + {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102B, 0x103E}, + {0x1040, 0x1049}, {0x1056, 0x1059}, {0x105E, 0x1060}, + {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074}, + {0x1082, 0x108D}, {0x108F, 0x109D}, {0x135D, 0x135F}, + {0x1369, 0x1371}, {0x1712, 0x1715}, {0x1732, 0x1734}, + {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3}, + {0x17DD, 0x17DD}, {0x17E0, 0x17E9}, {0x180B, 0x180D}, + {0x180F, 0x1819}, {0x18A9, 0x18A9}, {0x1920, 0x192B}, + {0x1930, 0x193B}, {0x1946, 0x194F}, {0x19D0, 0x19DA}, + {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E}, {0x1A60, 0x1A7C}, + {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD}, + {0x1ABF, 0x1ACE}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, + {0x1B50, 0x1B59}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, + {0x1BA1, 0x1BAD}, {0x1BB0, 0x1BB9}, {0x1BE6, 0x1BF3}, + {0x1C24, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, + {0x1CD0, 0x1CD2}, {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, + {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF}, + {0x203F, 0x2040}, {0x2054, 0x2054}, {0x20D0, 0x20DC}, + {0x20E1, 0x20E1}, {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1}, + {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F}, + {0x3099, 0x309A}, {0xA620, 0xA629}, {0xA66F, 0xA66F}, + {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, + {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, + {0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881}, + {0xA8B4, 0xA8C5}, {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, + {0xA8FF, 0xA909}, {0xA926, 0xA92D}, {0xA947, 0xA953}, + {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9}, + {0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36}, + {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59}, + {0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, + {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, + {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, + {0xABEC, 0xABED}, {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, + {0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, + {0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, + {0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, + {0x10376, 0x1037A}, {0x104A0, 0x104A9}, {0x10A01, 0x10A03}, + {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, + {0x10A3F, 0x10A3F}, {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, + {0x10D30, 0x10D39}, {0x10EAB, 0x10EAC}, {0x10EFD, 0x10EFF}, + {0x10F46, 0x10F50}, {0x10F82, 0x10F85}, {0x11000, 0x11002}, + {0x11038, 0x11046}, {0x11066, 0x11070}, {0x11073, 0x11074}, + {0x1107F, 0x11082}, {0x110B0, 0x110BA}, {0x110C2, 0x110C2}, + {0x110F0, 0x110F9}, {0x11100, 0x11102}, {0x11127, 0x11134}, + {0x11136, 0x1113F}, {0x11145, 0x11146}, {0x11173, 0x11173}, + {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC}, + {0x111CE, 0x111D9}, {0x1122C, 0x11237}, {0x1123E, 0x1123E}, + {0x11241, 0x11241}, {0x112DF, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11303}, {0x1133B, 0x1133C}, {0x1133E, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, @@ -344,21 +348,54 @@ static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = { {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45}, {0x11D47, 0x11D47}, {0x11D50, 0x11D59}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97}, - {0x11DA0, 0x11DA9}, {0x11EF3, 0x11EF6}, {0x16A60, 0x16A69}, - {0x16AC0, 0x16AC9}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, - {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87}, - {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, - {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, - {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, - {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, - {0x1D7CE, 0x1D7FF}, {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C}, - {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, - {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, - {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, - {0x1E130, 0x1E136}, {0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE}, - {0x1E2EC, 0x1E2F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, + {0x11DA0, 0x11DA9}, {0x11EF3, 0x11EF6}, {0x11F00, 0x11F01}, + {0x11F03, 0x11F03}, {0x11F34, 0x11F3A}, {0x11F3E, 0x11F42}, + {0x11F50, 0x11F59}, {0x13440, 0x13440}, {0x13447, 0x13455}, + {0x16A60, 0x16A69}, {0x16AC0, 0x16AC9}, {0x16AF0, 0x16AF4}, + {0x16B30, 0x16B36}, {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F}, + {0x16F51, 0x16F87}, {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, + {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D}, + {0x1CF30, 0x1CF46}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, + {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, + {0x1D242, 0x1D244}, {0x1D7CE, 0x1D7FF}, {0x1DA00, 0x1DA36}, + {0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, + {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, + {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, + {0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F}, {0x1E130, 0x1E136}, + {0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2F9}, + {0x1E4EC, 0x1E4F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, {0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF}}; +// Clang supports the "Mathematical notation profile" as an extension, +// as described in https://www.unicode.org/L2/L2022/22230-math-profile.pdf +// Math_Start +static const llvm::sys::UnicodeCharRange + MathematicalNotationProfileIDStartRanges[] = { + {0x02202, 0x02202}, // โ + {0x02207, 0x02207}, // โ + {0x0221E, 0x0221E}, // โ + {0x1D6C1, 0x1D6C1}, // ๐ + {0x1D6DB, 0x1D6DB}, // ๐ + {0x1D6FB, 0x1D6FB}, // ๐ป + {0x1D715, 0x1D715}, // ๐ + {0x1D735, 0x1D735}, // ๐ต + {0x1D74F, 0x1D74F}, // ๐ + {0x1D76F, 0x1D76F}, // ๐ฏ + {0x1D789, 0x1D789}, // ๐ + {0x1D7A9, 0x1D7A9}, // ๐ฉ + {0x1D7C3, 0x1D7C3}, // ๐ +}; + +// Math_Continue +static const llvm::sys::UnicodeCharRange + MathematicalNotationProfileIDContinueRanges[] = { + {0x000B2, 0x000B3}, // ยฒ-ยณ + {0x000B9, 0x000B9}, // ยน + {0x02070, 0x02070}, // โฐ + {0x02074, 0x0207E}, // โด-โพ + {0x02080, 0x0208E}, // โ-โ +}; + // C11 D.1, C++11 [charname.allowed] static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = { // 1 |