diff options
Diffstat (limited to 'lib/Lex/Preprocessor.cpp')
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 227 |
1 files changed, 28 insertions, 199 deletions
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 5160acf19e1f..6fe414b66414 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -53,10 +53,12 @@ Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, IdentifierInfoLookup* IILookup, bool OwnsHeaders) : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()), - SourceMgr(SM), HeaderInfo(Headers), ExternalSource(0), + SourceMgr(SM), + HeaderInfo(Headers), ExternalSource(0), Identifiers(opts, IILookup), BuiltinInfo(Target), CodeComplete(0), CodeCompletionFile(0), SkipMainFilePreamble(0, true), CurPPLexer(0), - CurDirLookup(0), Callbacks(0), MacroArgCache(0), Record(0) { + CurDirLookup(0), Callbacks(0), MacroArgCache(0), Record(0), MIChainHead(0), + MICache(0) { ScratchBuf = new ScratchBuffer(SourceMgr); CounterValue = 0; // __COUNTER__ starts at 0. OwnsHeaderSearch = OwnsHeaders; @@ -106,23 +108,8 @@ Preprocessor::~Preprocessor() { } // Free any macro definitions. - for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I = - Macros.begin(), E = Macros.end(); I != E; ++I) { - // We don't need to free the MacroInfo objects directly. These - // will be released when the BumpPtrAllocator 'BP' object gets - // destroyed. We still need to run the dtor, however, to free - // memory alocated by MacroInfo. - I->second->Destroy(); - I->first->setHasMacroDefinition(false); - } - for (std::vector<MacroInfo*>::iterator I = MICache.begin(), - E = MICache.end(); I != E; ++I) { - // We don't need to free the MacroInfo objects directly. These - // will be released when the BumpPtrAllocator 'BP' object gets - // destroyed. We still need to run the dtor, however, to free - // memory alocated by MacroInfo. - (*I)->Destroy(); - } + for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next) + I->MI.Destroy(); // Free any cached macro expanders. for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) @@ -291,114 +278,6 @@ void Preprocessor::CodeCompleteNaturalLanguage() { CodeComplete->CodeCompleteNaturalLanguage(); } -//===----------------------------------------------------------------------===// -// Token Spelling -//===----------------------------------------------------------------------===// - -/// getSpelling() - Return the 'spelling' of this token. The spelling of a -/// token are the characters used to represent the token in the source file -/// after trigraph expansion and escaped-newline folding. In particular, this -/// wants to get the true, uncanonicalized, spelling of things like digraphs -/// UCNs, etc. -std::string Preprocessor::getSpelling(const Token &Tok, - const SourceManager &SourceMgr, - const LangOptions &Features, - bool *Invalid) { - assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - - // If this token contains nothing interesting, return it directly. - bool CharDataInvalid = false; - const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), - &CharDataInvalid); - if (Invalid) - *Invalid = CharDataInvalid; - if (CharDataInvalid) - return std::string(); - - if (!Tok.needsCleaning()) - return std::string(TokStart, TokStart+Tok.getLength()); - - std::string Result; - Result.reserve(Tok.getLength()); - - // Otherwise, hard case, relex the characters into the string. - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); - Ptr != End; ) { - unsigned CharSize; - Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); - Ptr += CharSize; - } - assert(Result.size() != unsigned(Tok.getLength()) && - "NeedsCleaning flag set on something that didn't need cleaning!"); - return Result; -} - -/// getSpelling() - Return the 'spelling' of this token. The spelling of a -/// token are the characters used to represent the token in the source file -/// after trigraph expansion and escaped-newline folding. In particular, this -/// wants to get the true, uncanonicalized, spelling of things like digraphs -/// UCNs, etc. -std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const { - return getSpelling(Tok, SourceMgr, Features, Invalid); -} - -/// getSpelling - This method is used to get the spelling of a token into a -/// preallocated buffer, instead of as an std::string. The caller is required -/// to allocate enough space for the token, which is guaranteed to be at least -/// Tok.getLength() bytes long. The actual length of the token is returned. -/// -/// Note that this method may do two possible things: it may either fill in -/// the buffer specified with characters, or it may *change the input pointer* -/// to point to a constant buffer with the data already in it (avoiding a -/// copy). The caller is not allowed to modify the returned buffer pointer -/// if an internal buffer is returned. -unsigned Preprocessor::getSpelling(const Token &Tok, - const char *&Buffer, bool *Invalid) const { - assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - - // If this token is an identifier, just return the string from the identifier - // table, which is very quick. - if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { - Buffer = II->getNameStart(); - return II->getLength(); - } - - // Otherwise, compute the start of the token in the input lexer buffer. - const char *TokStart = 0; - - if (Tok.isLiteral()) - TokStart = Tok.getLiteralData(); - - if (TokStart == 0) { - bool CharDataInvalid = false; - TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); - if (Invalid) - *Invalid = CharDataInvalid; - if (CharDataInvalid) { - Buffer = ""; - return 0; - } - } - - // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) { - Buffer = TokStart; - return Tok.getLength(); - } - - // Otherwise, hard case, relex the characters into the string. - char *OutBuf = const_cast<char*>(Buffer); - for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); - Ptr != End; ) { - unsigned CharSize; - *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); - Ptr += CharSize; - } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && - "NeedsCleaning flag set on something that didn't need cleaning!"); - - return OutBuf-Buffer; -} /// getSpelling - This method is used to get the spelling of a token into a /// SmallVector. Note that the returned StringRef may not point to the @@ -406,9 +285,12 @@ unsigned Preprocessor::getSpelling(const Token &Tok, llvm::StringRef Preprocessor::getSpelling(const Token &Tok, llvm::SmallVectorImpl<char> &Buffer, bool *Invalid) const { - // Try the fast path. - if (const IdentifierInfo *II = Tok.getIdentifierInfo()) - return II->getName(); + // NOTE: this has to be checked *before* testing for an IdentifierInfo. + if (Tok.isNot(tok::raw_identifier)) { + // Try the fast path. + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) + return II->getName(); + } // Resize the buffer if we need to copy into it. if (Tok.needsCleaning()) @@ -434,71 +316,14 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, InstantiationLoc, Len); Tok.setLocation(Loc); - // If this is a literal token, set the pointer data. - if (Tok.isLiteral()) + // If this is a raw identifier or a literal token, set the pointer data. + if (Tok.is(tok::raw_identifier)) + Tok.setRawIdentifierData(DestPtr); + else if (Tok.isLiteral()) Tok.setLiteralData(DestPtr); } -/// AdvanceToTokenCharacter - Given a location that specifies the start of a -/// token, return a new location that specifies a character within the token. -SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, - unsigned CharNo) { - // Figure out how many physical characters away the specified instantiation - // character is. This needs to take into consideration newlines and - // trigraphs. - bool Invalid = false; - const char *TokPtr = SourceMgr.getCharacterData(TokStart, &Invalid); - - // If they request the first char of the token, we're trivially done. - if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) - return TokStart; - - unsigned PhysOffset = 0; - - // The usual case is that tokens don't contain anything interesting. Skip - // over the uninteresting characters. If a token only consists of simple - // chars, this method is extremely fast. - while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { - if (CharNo == 0) - return TokStart.getFileLocWithOffset(PhysOffset); - ++TokPtr, --CharNo, ++PhysOffset; - } - - // If we have a character that may be a trigraph or escaped newline, use a - // lexer to parse it correctly. - for (; CharNo; --CharNo) { - unsigned Size; - Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features); - TokPtr += Size; - PhysOffset += Size; - } - - // Final detail: if we end up on an escaped newline, we want to return the - // location of the actual byte of the token. For example foo\<newline>bar - // advanced by 3 should return the location of b, not of \\. One compounding - // detail of this is that the escape may be made by a trigraph. - if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) - PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; - - return TokStart.getFileLocWithOffset(PhysOffset); -} - -SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc, - unsigned Offset) { - if (Loc.isInvalid() || !Loc.isFileID()) - return SourceLocation(); - - unsigned Len = Lexer::MeasureTokenLength(Loc, getSourceManager(), Features); - if (Len > Offset) - Len = Len - Offset; - else - return Loc; - - return AdvanceToTokenCharacter(Loc, Len); -} - - //===----------------------------------------------------------------------===// // Preprocessor Initialization Methods @@ -549,25 +374,29 @@ void Preprocessor::EndSourceFile() { // Lexer Event Handling. //===----------------------------------------------------------------------===// -/// LookUpIdentifierInfo - Given a tok::identifier token, look up the -/// identifier information for the token and install it into the token. -IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier, - const char *BufPtr) const { - assert(Identifier.is(tok::identifier) && "Not an identifier!"); - assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!"); +/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the +/// identifier information for the token and install it into the token, +/// updating the token kind accordingly. +IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { + assert(Identifier.getRawIdentifierData() != 0 && "No raw identifier data!"); // Look up this token, see if it is a macro, or if it is a language keyword. IdentifierInfo *II; - if (BufPtr && !Identifier.needsCleaning()) { + if (!Identifier.needsCleaning()) { // No cleaning needed, just use the characters from the lexed buffer. - II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength())); + II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(), + Identifier.getLength())); } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. llvm::SmallString<64> IdentifierBuffer; llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); II = getIdentifierInfo(CleanedStr); } + + // Update the token info (identifier info and appropriate token kind). Identifier.setIdentifierInfo(II); + Identifier.setKind(II->getTokenID()); + return II; } |