vendor/clang/clang-trunk-r154661

author: Dimitry Andric <dim@FreeBSD.org> 2012-04-14 14:01:31 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2012-04-14 14:01:31 +0000
commit: dbe13110f59f48b4dbb7552b3ac2935acdeece7f (patch)
tree: be1815eb79b42ff482a8562b13c2dcbf0c5dcbee /lib/Lex/Lexer.cpp
parent: 9da628931ebf2609493570f87824ca22402cc65f (diff)
download: src-test2-dbe13110f59f48b4dbb7552b3ac2935acdeece7f.tar.gz
src-test2-dbe13110f59f48b4dbb7552b3ac2935acdeece7f.zip
1 files changed, 348 insertions, 90 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a98d889dbc98..535a852429b5 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -30,6 +30,7 @@
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cstring>
@@ -59,6 +60,8 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
 // Lexer Class Implementation
 //===----------------------------------------------------------------------===//
 
+void Lexer::anchor() { }
+
 void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
                       const char *BufEnd) {
   InitCharacterInfo();
@@ -114,7 +117,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
 Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
   : PreprocessorLexer(&PP, FID),
     FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
-    Features(PP.getLangOptions()) {
+    LangOpts(PP.getLangOpts()) {
 
   InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
             InputFile->getBufferEnd());
@@ -126,9 +129,9 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
 /// Lexer constructor - Create a new raw lexer object.  This object is only
 /// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
 /// range will outlive it, so it doesn't take ownership of it.
-Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
+Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts,
              const char *BufStart, const char *BufPtr, const char *BufEnd)
-  : FileLoc(fileloc), Features(features) {
+  : FileLoc(fileloc), LangOpts(langOpts) {
 
   InitLexer(BufStart, BufPtr, BufEnd);
 
@@ -140,8 +143,8 @@ Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
 /// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
 /// range will outlive it, so it doesn't take ownership of it.
 Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile,
-             const SourceManager &SM, const LangOptions &features)
-  : FileLoc(SM.getLocForStartOfFile(FID)), Features(features) {
+             const SourceManager &SM, const LangOptions &langOpts)
+  : FileLoc(SM.getLocForStartOfFile(FID)), LangOpts(langOpts) {
 
   InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(),
             FromFile->getBufferEnd());
@@ -284,7 +287,7 @@ StringRef Lexer::getSpelling(SourceLocation loc,
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
 std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
-                               const LangOptions &Features, bool *Invalid) {
+                               const LangOptions &LangOpts, bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
   
   // If this token contains nothing interesting, return it directly.
@@ -306,7 +309,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
   for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
        Ptr != End; ) {
     unsigned CharSize;
-    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts));
     Ptr += CharSize;
   }
   assert(Result.size() != unsigned(Tok.getLength()) &&
@@ -326,7 +329,7 @@ std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
 /// if an internal buffer is returned.
 unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, 
                             const SourceManager &SourceMgr,
-                            const LangOptions &Features, bool *Invalid) {
+                            const LangOptions &LangOpts, bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   const char *TokStart = 0;
@@ -366,7 +369,7 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
   for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
        Ptr != End; ) {
     unsigned CharSize;
-    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts);
     Ptr += CharSize;
   }
   assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
@@ -487,11 +490,11 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
  SourceLocation FileLoc = SM.getSpellingLoc(Loc);
  SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts);
  std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);
- std::pair<FileID, unsigned> BeginFileLocInfo= SM.getDecomposedLoc(BeginFileLoc);
+ std::pair<FileID, unsigned> BeginFileLocInfo
+   = SM.getDecomposedLoc(BeginFileLoc);
  assert(FileLocInfo.first == BeginFileLocInfo.first &&
         FileLocInfo.second >= BeginFileLocInfo.second);
- return Loc.getLocWithOffset(SM.getDecomposedLoc(BeginFileLoc).second -
-                                 SM.getDecomposedLoc(FileLoc).second);
+ return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second);
 }
 
 namespace {
@@ -505,13 +508,13 @@ namespace {
 
 std::pair<unsigned, bool>
 Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
-                       const LangOptions &Features, unsigned MaxLines) {
+                       const LangOptions &LangOpts, unsigned MaxLines) {
   // Create a lexer starting at the beginning of the file. Note that we use a
   // "fake" file source location at offset 1 so that the lexer will track our
   // position within the file.
   const unsigned StartOffset = 1;
   SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);
-  Lexer TheLexer(StartLoc, Features, Buffer->getBufferStart(),
+  Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),
                  Buffer->getBufferStart(), Buffer->getBufferEnd());
   
   bool InPreprocessorDirective = false;
@@ -655,7 +658,7 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer,
 SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
                                               unsigned CharNo,
                                               const SourceManager &SM,
-                                              const LangOptions &Features) {
+                                              const LangOptions &LangOpts) {
   // Figure out how many physical characters away the specified expansion
   // character is.  This needs to take into consideration newlines and
   // trigraphs.
@@ -681,7 +684,7 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
   // lexer to parse it correctly.
   for (; CharNo; --CharNo) {
     unsigned Size;
-    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
+    Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);
     TokPtr += Size;
     PhysOffset += Size;
   }
@@ -713,19 +716,16 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
 /// a source location pointing to the last character in the token, etc.
 SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
                                           const SourceManager &SM,
-                                          const LangOptions &Features) {
+                                          const LangOptions &LangOpts) {
   if (Loc.isInvalid())
     return SourceLocation();
 
   if (Loc.isMacroID()) {
-    if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, Features))
+    if (Offset > 0 || !isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
       return SourceLocation(); // Points inside the macro expansion.
-
-    // Continue and find the location just after the macro expansion.
-    Loc = SM.getExpansionRange(Loc).second;
   }
 
-  unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features);
+  unsigned Len = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
   if (Len > Offset)
     Len = Len - Offset;
   else
@@ -738,7 +738,8 @@ SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
 /// token of the macro expansion.
 bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
                                       const SourceManager &SM,
-                                      const LangOptions &LangOpts) {
+                                      const LangOptions &LangOpts,
+                                      SourceLocation *MacroBegin) {
   assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
 
   std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc);
@@ -749,17 +750,22 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
 
   SourceLocation expansionLoc =
     SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart();
-  if (expansionLoc.isFileID())
-    return true; // No other macro expansions, this is the first.
+  if (expansionLoc.isFileID()) {
+    // No other macro expansions, this is the first.
+    if (MacroBegin)
+      *MacroBegin = expansionLoc;
+    return true;
+  }
 
-  return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts);
+  return isAtStartOfMacroExpansion(expansionLoc, SM, LangOpts, MacroBegin);
 }
 
 /// \brief Returns true if the given MacroID location points at the last
 /// token of the macro expansion.
 bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
-                                        const SourceManager &SM,
-                                        const LangOptions &LangOpts) {
+                                    const SourceManager &SM,
+                                    const LangOptions &LangOpts,
+                                    SourceLocation *MacroEnd) {
   assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
 
   SourceLocation spellLoc = SM.getSpellingLoc(loc);
@@ -777,10 +783,192 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
 
   SourceLocation expansionLoc =
     SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd();
-  if (expansionLoc.isFileID())
-    return true; // No other macro expansions.
+  if (expansionLoc.isFileID()) {
+    // No other macro expansions.
+    if (MacroEnd)
+      *MacroEnd = expansionLoc;
+    return true;
+  }
+
+  return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts, MacroEnd);
+}
+
+static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
+                                             const SourceManager &SM,
+                                             const LangOptions &LangOpts) {
+  SourceLocation Begin = Range.getBegin();
+  SourceLocation End = Range.getEnd();
+  assert(Begin.isFileID() && End.isFileID());
+  if (Range.isTokenRange()) {
+    End = Lexer::getLocForEndOfToken(End, 0, SM,LangOpts);
+    if (End.isInvalid())
+      return CharSourceRange();
+  }
+
+  // Break down the source locations.
+  FileID FID;
+  unsigned BeginOffs;
+  llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin);
+  if (FID.isInvalid())
+    return CharSourceRange();
+
+  unsigned EndOffs;
+  if (!SM.isInFileID(End, FID, &EndOffs) ||
+      BeginOffs > EndOffs)
+    return CharSourceRange();
+
+  return CharSourceRange::getCharRange(Begin, End);
+}
+
+/// \brief Accepts a range and returns a character range with file locations.
+///
+/// Returns a null range if a part of the range resides inside a macro
+/// expansion or the range does not reside on the same FileID.
+CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
+                                         const SourceManager &SM,
+                                         const LangOptions &LangOpts) {
+  SourceLocation Begin = Range.getBegin();
+  SourceLocation End = Range.getEnd();
+  if (Begin.isInvalid() || End.isInvalid())
+    return CharSourceRange();
+
+  if (Begin.isFileID() && End.isFileID())
+    return makeRangeFromFileLocs(Range, SM, LangOpts);
+
+  if (Begin.isMacroID() && End.isFileID()) {
+    if (!isAtStartOfMacroExpansion(Begin, SM, LangOpts, &Begin))
+      return CharSourceRange();
+    Range.setBegin(Begin);
+    return makeRangeFromFileLocs(Range, SM, LangOpts);
+  }
+
+  if (Begin.isFileID() && End.isMacroID()) {
+    if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
+                                                          &End)) ||
+        (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
+                                                           &End)))
+      return CharSourceRange();
+    Range.setEnd(End);
+    return makeRangeFromFileLocs(Range, SM, LangOpts);
+  }
+
+  assert(Begin.isMacroID() && End.isMacroID());
+  SourceLocation MacroBegin, MacroEnd;
+  if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) &&
+      ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts,
+                                                        &MacroEnd)) ||
+       (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts,
+                                                         &MacroEnd)))) {
+    Range.setBegin(MacroBegin);
+    Range.setEnd(MacroEnd);
+    return makeRangeFromFileLocs(Range, SM, LangOpts);
+  }
 
-  return isAtEndOfMacroExpansion(expansionLoc, SM, LangOpts);
+  FileID FID;
+  unsigned BeginOffs;
+  llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin);
+  if (FID.isInvalid())
+    return CharSourceRange();
+
+  unsigned EndOffs;
+  if (!SM.isInFileID(End, FID, &EndOffs) ||
+      BeginOffs > EndOffs)
+    return CharSourceRange();
+
+  const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
+  const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
+  if (Expansion.isMacroArgExpansion() &&
+      Expansion.getSpellingLoc().isFileID()) {
+    SourceLocation SpellLoc = Expansion.getSpellingLoc();
+    Range.setBegin(SpellLoc.getLocWithOffset(BeginOffs));
+    Range.setEnd(SpellLoc.getLocWithOffset(EndOffs));
+    return makeRangeFromFileLocs(Range, SM, LangOpts);
+  }
+
+  return CharSourceRange();
+}
+
+StringRef Lexer::getSourceText(CharSourceRange Range,
+                               const SourceManager &SM,
+                               const LangOptions &LangOpts,
+                               bool *Invalid) {
+  Range = makeFileCharRange(Range, SM, LangOpts);
+  if (Range.isInvalid()) {
+    if (Invalid) *Invalid = true;
+    return StringRef();
+  }
+
+  // Break down the source location.
+  std::pair<FileID, unsigned> beginInfo = SM.getDecomposedLoc(Range.getBegin());
+  if (beginInfo.first.isInvalid()) {
+    if (Invalid) *Invalid = true;
+    return StringRef();
+  }
+
+  unsigned EndOffs;
+  if (!SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
+      beginInfo.second > EndOffs) {
+    if (Invalid) *Invalid = true;
+    return StringRef();
+  }
+
+  // Try to the load the file buffer.
+  bool invalidTemp = false;
+  StringRef file = SM.getBufferData(beginInfo.first, &invalidTemp);
+  if (invalidTemp) {
+    if (Invalid) *Invalid = true;
+    return StringRef();
+  }
+
+  if (Invalid) *Invalid = false;
+  return file.substr(beginInfo.second, EndOffs - beginInfo.second);
+}
+
+StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
+                                       const SourceManager &SM,
+                                       const LangOptions &LangOpts) {
+  assert(Loc.isMacroID() && "Only reasonble to call this on macros");
+
+  // Find the location of the immediate macro expansion.
+  while (1) {
+    FileID FID = SM.getFileID(Loc);
+    const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
+    const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
+    Loc = Expansion.getExpansionLocStart();
+    if (!Expansion.isMacroArgExpansion())
+      break;
+
+    // For macro arguments we need to check that the argument did not come
+    // from an inner macro, e.g: "MAC1( MAC2(foo) )"
+    
+    // Loc points to the argument id of the macro definition, move to the
+    // macro expansion.
+    Loc = SM.getImmediateExpansionRange(Loc).first;
+    SourceLocation SpellLoc = Expansion.getSpellingLoc();
+    if (SpellLoc.isFileID())
+      break; // No inner macro.
+
+    // If spelling location resides in the same FileID as macro expansion
+    // location, it means there is no inner macro.
+    FileID MacroFID = SM.getFileID(Loc);
+    if (SM.isInFileID(SpellLoc, MacroFID))
+      break;
+
+    // Argument came from inner macro.
+    Loc = SpellLoc;
+  }
+
+  // Find the spelling location of the start of the non-argument expansion
+  // range. This is where the macro name was spelled in order to begin
+  // expanding this macro.
+  Loc = SM.getSpellingLoc(Loc);
+
+  // Dig out the buffer where the macro name was spelled and the extents of the
+  // name so that we can render it into the expansion note.
+  std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc);
+  unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
+  StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first);
+  return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
 }
 
 //===----------------------------------------------------------------------===//
@@ -890,6 +1078,12 @@ static void InitCharacterInfo() {
 }
 
 
+/// isIdentifierHead - Return true if this is the first character of an
+/// identifier, which is [a-zA-Z_].
+static inline bool isIdentifierHead(unsigned char c) {
+  return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
+}
+
 /// isIdentifierBody - Return true if this is the body character of an
 /// identifier, which is [a-zA-Z0-9_].
 static inline bool isIdentifierBody(unsigned char c) {
@@ -1018,7 +1212,7 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) {
   char Res = GetTrigraphCharForLetter(*CP);
   if (!Res || !L) return Res;
 
-  if (!L->getFeatures().Trigraphs) {
+  if (!L->getLangOpts().Trigraphs) {
     if (!L->isLexingRawMode())
       L->Diag(CP-2, diag::trigraph_ignored);
     return 0;
@@ -1085,9 +1279,8 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
                                         const LangOptions &LangOpts,
                                         bool SkipTrailingWhitespaceAndNewLine) {
   if (Loc.isMacroID()) {
-    if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts))
+    if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
       return SourceLocation();
-    Loc = SM.getExpansionRange(Loc).second;
   }
   Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
 
@@ -1169,6 +1362,13 @@ Slash:
       // Found backslash<whitespace><newline>.  Parse the char after it.
       Size += EscapedNewLineSize;
       Ptr  += EscapedNewLineSize;
+
+      // If the char that we finally got was a \n, then we must have had
+      // something like \<newline><newline>.  We don't want to consume the
+      // second newline.
+      if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0')
+        return ' ';
+
       // Use slow version to accumulate a correct size field.
       return getCharAndSizeSlow(Ptr, Size, Tok);
     }
@@ -1205,7 +1405,7 @@ Slash:
 /// NOTE: When this method is updated, getCharAndSizeSlow (above) should
 /// be updated to match.
 char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
-                                     const LangOptions &Features) {
+                                     const LangOptions &LangOpts) {
   // If we have a slash, look for an escaped newline.
   if (Ptr[0] == '\\') {
     ++Size;
@@ -1220,8 +1420,14 @@ Slash:
       Size += EscapedNewLineSize;
       Ptr  += EscapedNewLineSize;
 
+      // If the char that we finally got was a \n, then we must have had
+      // something like \<newline><newline>.  We don't want to consume the
+      // second newline.
+      if (*Ptr == '\n' || *Ptr == '\r' || *Ptr == '\0')
+        return ' ';
+
       // Use slow version to accumulate a correct size field.
-      return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
+      return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
     }
 
     // Otherwise, this is not an escaped newline, just return the slash.
@@ -1229,7 +1435,7 @@ Slash:
   }
 
   // If this is a trigraph, process it.
-  if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
+  if (LangOpts.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
     // If this is actually a legal trigraph (not something like "??x"), return
     // it.
     if (char C = GetTrigraphCharForLetter(Ptr[2])) {
@@ -1272,7 +1478,7 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
   //
   // TODO: Could merge these checks into a CharInfo flag to make the comparison
   // cheaper
-  if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
+  if (C != '\\' && C != '?' && (C != '$' || !LangOpts.DollarIdents)) {
 FinishIdentifier:
     const char *IdStart = BufferPtr;
     FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
@@ -1301,7 +1507,7 @@ FinishIdentifier:
   while (1) {
     if (C == '$') {
       // If we hit a $ and they are not supported in identifiers, we are done.
-      if (!Features.DollarIdents) goto FinishIdentifier;
+      if (!LangOpts.DollarIdents) goto FinishIdentifier;
 
       // Otherwise, emit a diagnostic and continue.
       if (!isLexingRawMode())
@@ -1327,12 +1533,12 @@ FinishIdentifier:
 
 /// isHexaLiteral - Return true if Start points to a hex constant.
 /// in microsoft mode (where this is supposed to be several different tokens).
-static bool isHexaLiteral(const char *Start, const LangOptions &Features) {
+static bool isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
   unsigned Size;
-  char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, Features);
+  char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts);
   if (C1 != '0')
     return false;
-  char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, Features);
+  char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts);
   return (C2 == 'x' || C2 == 'X');
 }
 
@@ -1343,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   unsigned Size;
   char C = getCharAndSize(CurPtr, Size);
   char PrevCh = 0;
-  while (isNumberBody(C)) { // FIXME: UCNs?
+  while (isNumberBody(C)) { // FIXME: UCNs.
     CurPtr = ConsumeChar(CurPtr, Size, Result);
     PrevCh = C;
     C = getCharAndSize(CurPtr, Size);
@@ -1353,7 +1559,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) {
     // If we are in Microsoft mode, don't continue if the constant is hex.
     // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1
-    if (!Features.MicrosoftExt || !isHexaLiteral(BufferPtr, Features))
+    if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
       return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
   }
 
@@ -1367,6 +1573,46 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   Result.setLiteralData(TokStart);
 }
 
+/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
+/// in C++11, or warn on a ud-suffix in C++98.
+const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
+  assert(getLangOpts().CPlusPlus);
+
+  // Maximally munch an identifier. FIXME: UCNs.
+  unsigned Size;
+  char C = getCharAndSize(CurPtr, Size);
+  if (isIdentifierHead(C)) {
+    if (!getLangOpts().CPlusPlus0x) {
+      if (!isLexingRawMode())
+        Diag(CurPtr,
+             C == '_' ? diag::warn_cxx11_compat_user_defined_literal
+                      : diag::warn_cxx11_compat_reserved_user_defined_literal)
+          << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+      return CurPtr;
+    }
+
+    // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix
+    // that does not start with an underscore is ill-formed. As a conforming
+    // extension, we treat all such suffixes as if they had whitespace before
+    // them.
+    if (C != '_') {
+      if (!isLexingRawMode())
+        Diag(CurPtr, getLangOpts().MicrosoftMode ? 
+            diag::ext_ms_reserved_user_defined_literal :
+            diag::ext_reserved_user_defined_literal)
+          << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+      return CurPtr;
+    }
+
+    Result.setFlag(Token::HasUDSuffix);
+    do {
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+    } while (isIdentifierBody(C));
+  }
+  return CurPtr;
+}
+
 /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
 /// either " or L" or u8" or u" or U".
 void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
@@ -1388,7 +1634,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
     
     if (C == '\n' || C == '\r' ||             // Newline.
         (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
-      if (!isLexingRawMode() && !Features.AsmPreprocessor)
+      if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
         Diag(BufferPtr, diag::warn_unterminated_string);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return;
@@ -1406,6 +1652,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
     C = getAndAdvanceChar(CurPtr, Result);
   }
 
+  // If we are in C++11, lex the optional ud-suffix.
+  if (getLangOpts().CPlusPlus)
+    CurPtr = LexUDSuffix(Result, CurPtr);
+
   // If a nul character existed in the string, warn about it.
   if (NulCharacter && !isLexingRawMode())
     Diag(NulCharacter, diag::null_in_string);
@@ -1485,6 +1735,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
     }
   }
 
+  // If we are in C++11, lex the optional ud-suffix.
+  if (getLangOpts().CPlusPlus)
+    CurPtr = LexUDSuffix(Result, CurPtr);
+
   // Update the location of token as well as BufferPtr.
   const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, Kind);
@@ -1538,7 +1792,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 
   char C = getAndAdvanceChar(CurPtr, Result);
   if (C == '\'') {
-    if (!isLexingRawMode() && !Features.AsmPreprocessor)
+    if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
       Diag(BufferPtr, diag::err_empty_character);
     FormTokenWithChars(Result, CurPtr, tok::unknown);
     return;
@@ -1552,7 +1806,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
       C = getAndAdvanceChar(CurPtr, Result);
     } else if (C == '\n' || C == '\r' ||             // Newline.
                (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
-      if (!isLexingRawMode() && !Features.AsmPreprocessor)
+      if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
         Diag(BufferPtr, diag::warn_unterminated_char);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return;
@@ -1568,6 +1822,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
     C = getAndAdvanceChar(CurPtr, Result);
   }
 
+  // If we are in C++11, lex the optional ud-suffix.
+  if (getLangOpts().CPlusPlus)
+    CurPtr = LexUDSuffix(Result, CurPtr);
+
   // If a nul character existed in the character, warn about it.
   if (NulCharacter && !isLexingRawMode())
     Diag(NulCharacter, diag::null_in_char);
@@ -1633,12 +1891,12 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
 bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
   // If BCPL comments aren't explicitly enabled for this language, emit an
   // extension warning.
-  if (!Features.BCPLComment && !isLexingRawMode()) {
+  if (!LangOpts.BCPLComment && !isLexingRawMode()) {
     Diag(BufferPtr, diag::ext_bcpl_comment);
 
     // Mark them enabled so we only emit one warning for this translation
     // unit.
-    Features.BCPLComment = true;
+    LangOpts.BCPLComment = true;
   }
 
   // Scan over the body of the comment.  The common case, when scanning, is that
@@ -1687,14 +1945,6 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
       break;
     }
 
-    // If the char that we finally got was a \n, then we must have had something
-    // like \<newline><newline>.  We don't want to have consumed the second
-    // newline, we want CurPtr, to end up pointing to it down below.
-    if (C == '\n' || C == '\r') {
-      --CurPtr;
-      C = 'x'; // doesn't matter what this is.
-    }
-
     // If we read multiple characters, and one of those characters was a \r or
     // \n, then we had an escaped newline within the comment.  Emit diagnostic
     // unless the next line is also a // comment.
@@ -1833,7 +2083,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
 
     // If no trigraphs are enabled, warn that we ignored this trigraph and
     // ignore this * character.
-    if (!L->getFeatures().Trigraphs) {
+    if (!L->getLangOpts().Trigraphs) {
       if (!L->isLexingRawMode())
         L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
       return false;
@@ -1916,11 +2166,18 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
       if (C == '/') goto FoundSlash;
 
 #ifdef __SSE2__
-      __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/',
-                                     '/', '/', '/', '/', '/', '/', '/', '/');
-      while (CurPtr+16 <= BufferEnd &&
-             _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0)
+      __m128i Slashes = _mm_set1_epi8('/');
+      while (CurPtr+16 <= BufferEnd) {
+        int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes));
+        if (cmp != 0) {
+          // Adjust the pointer to point directly after the first slash. It's
+          // not necessary to set C here, it will be overwritten at the end of
+          // the outer loop.
+          CurPtr += llvm::CountTrailingZeros_32(cmp) + 1;
+          goto FoundSlash;
+        }
         CurPtr += 16;
+      }
 #elif __ALTIVEC__
       __vector unsigned char Slashes = {
         '/', '/', '/', '/',  '/', '/', '/', '/',
@@ -1948,8 +2205,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
     while (C != '/' && C != '\0')
       C = *CurPtr++;
 
-  FoundSlash:
     if (C == '/') {
+  FoundSlash:
       if (CurPtr[-2] == '*')  // We found the final */.  We're done!
         break;
 
@@ -2119,8 +2376,9 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
   // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
   // a pedwarn.
   if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r'))
-    Diag(BufferEnd, diag::ext_no_newline_eof)
-      << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n");
+    Diag(BufferEnd, LangOpts.CPlusPlus0x ? // C++11 [lex.phases] 2.2 p2
+         diag::warn_cxx98_compat_no_newline_eof : diag::ext_no_newline_eof)
+    << FixItHint::CreateInsertion(getSourceLocation(BufferEnd), "\n");
 
   BufferPtr = CurPtr;
 
@@ -2345,7 +2603,7 @@ LexNextToken:
       
   case 26:  // DOS & CP/M EOF: "^Z".
     // If we're in Microsoft extensions mode, treat this as end of file.
-    if (Features.MicrosoftExt) {
+    if (LangOpts.MicrosoftExt) {
       // Read the PP instance variable into an automatic variable, because
       // LexEndOfFile will often delete 'this'.
       Preprocessor *PPCache = PP;
@@ -2398,7 +2656,7 @@ LexNextToken:
     // If the next token is obviously a // or /* */ comment, skip it efficiently
     // too (without going through the big switch stmt).
     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
-        Features.BCPLComment && !Features.TraditionalCPP) {
+        LangOpts.BCPLComment && !LangOpts.TraditionalCPP) {
       if (SkipBCPLComment(Result, CurPtr+2))
         return; // There is a token to return.
       goto SkipIgnoredUnits;
@@ -2423,7 +2681,7 @@ LexNextToken:
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
-    if (Features.CPlusPlus0x) {
+    if (LangOpts.CPlusPlus0x) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       // UTF-16 string literal
@@ -2475,7 +2733,7 @@ LexNextToken:
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
-    if (Features.CPlusPlus0x) {
+    if (LangOpts.CPlusPlus0x) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       // UTF-32 string literal
@@ -2503,7 +2761,7 @@ LexNextToken:
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
-    if (Features.CPlusPlus0x) {
+    if (LangOpts.CPlusPlus0x) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       if (Char == '"')
@@ -2526,7 +2784,7 @@ LexNextToken:
                               tok::wide_string_literal);
 
     // Wide raw string literal.
-    if (Features.CPlusPlus0x && Char == 'R' &&
+    if (LangOpts.CPlusPlus0x && Char == 'R' &&
         getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
       return LexRawStringLiteral(Result,
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -2554,7 +2812,7 @@ LexNextToken:
     return LexIdentifier(Result, CurPtr);
 
   case '$':   // $ in identifiers.
-    if (Features.DollarIdents) {
+    if (LangOpts.DollarIdents) {
       if (!isLexingRawMode())
         Diag(CurPtr-1, diag::ext_dollar_in_identifier);
       // Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -2606,7 +2864,7 @@ LexNextToken:
       MIOpt.ReadToken();
 
       return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
-    } else if (Features.CPlusPlus && Char == '*') {
+    } else if (LangOpts.CPlusPlus && Char == '*') {
       Kind = tok::periodstar;
       CurPtr += SizeTmp;
     } else if (Char == '.' &&
@@ -2655,7 +2913,7 @@ LexNextToken:
     if (Char == '-') {      // --
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::minusminus;
-    } else if (Char == '>' && Features.CPlusPlus &&
+    } else if (Char == '>' && LangOpts.CPlusPlus &&
                getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {  // C++ ->*
       CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                            SizeTmp2, Result);
@@ -2693,9 +2951,9 @@ LexNextToken:
       // "foo".  Check to see if the character after the second slash is a '*'.
       // If so, we will lex that as a "/" instead of the start of a comment.
       // However, we never do this in -traditional-cpp mode.
-      if ((Features.BCPLComment ||
+      if ((LangOpts.BCPLComment ||
            getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') &&
-          !Features.TraditionalCPP) {
+          !LangOpts.TraditionalCPP) {
         if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
           return; // There is a token to return.
 
@@ -2724,20 +2982,20 @@ LexNextToken:
     if (Char == '=') {
       Kind = tok::percentequal;
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
-    } else if (Features.Digraphs && Char == '>') {
+    } else if (LangOpts.Digraphs && Char == '>') {
       Kind = tok::r_brace;                             // '%>' -> '}'
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
-    } else if (Features.Digraphs && Char == ':') {
+    } else if (LangOpts.Digraphs && Char == ':') {
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Char = getCharAndSize(CurPtr, SizeTmp);
       if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
         Kind = tok::hashhash;                          // '%:%:' -> '##'
         CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                              SizeTmp2, Result);
-      } else if (Char == '@' && Features.MicrosoftExt) {// %:@ -> #@ -> Charize
+      } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize
         CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
         if (!isLexingRawMode())
-          Diag(BufferPtr, diag::charize_microsoft_ext);
+          Diag(BufferPtr, diag::ext_charize_microsoft);
         Kind = tok::hashat;
       } else {                                         // '%:' -> '#'
         // We parsed a # character.  If this occurs at the start of the line,
@@ -2789,7 +3047,7 @@ LexNextToken:
         // If this is '<<<<' and we're in a Perforce-style conflict marker,
         // ignore it.
         goto LexNextToken;
-      } else if (Features.CUDA && After == '<') {
+      } else if (LangOpts.CUDA && After == '<') {
         Kind = tok::lesslessless;
         CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                              SizeTmp2, Result);
@@ -2800,8 +3058,8 @@ LexNextToken:
     } else if (Char == '=') {
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::lessequal;
-    } else if (Features.Digraphs && Char == ':') {     // '<:' -> '['
-      if (Features.CPlusPlus0x &&
+    } else if (LangOpts.Digraphs && Char == ':') {     // '<:' -> '['
+      if (LangOpts.CPlusPlus0x &&
           getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == ':') {
         // C++0x [lex.pptoken]p3:
         //  Otherwise, if the next three characters are <:: and the subsequent
@@ -2820,7 +3078,7 @@ LexNextToken:
 
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::l_square;
-    } else if (Features.Digraphs && Char == '%') {     // '<%' -> '{'
+    } else if (LangOpts.Digraphs && Char == '%') {     // '<%' -> '{'
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::l_brace;
     } else {
@@ -2845,7 +3103,7 @@ LexNextToken:
       } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
         // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
         goto LexNextToken;
-      } else if (Features.CUDA && After == '>') {
+      } else if (LangOpts.CUDA && After == '>') {
         Kind = tok::greatergreatergreater;
         CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                              SizeTmp2, Result);
@@ -2884,10 +3142,10 @@ LexNextToken:
     break;
   case ':':
     Char = getCharAndSize(CurPtr, SizeTmp);
-    if (Features.Digraphs && Char == '>') {
+    if (LangOpts.Digraphs && Char == '>') {
       Kind = tok::r_square; // ':>' -> ']'
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
-    } else if (Features.CPlusPlus && Char == ':') {
+    } else if (LangOpts.CPlusPlus && Char == ':') {
       Kind = tok::coloncolon;
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
     } else {
@@ -2918,10 +3176,10 @@ LexNextToken:
     if (Char == '#') {
       Kind = tok::hashhash;
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
-    } else if (Char == '@' && Features.MicrosoftExt) {  // #@ -> Charize
+    } else if (Char == '@' && LangOpts.MicrosoftExt) {  // #@ -> Charize
       Kind = tok::hashat;
       if (!isLexingRawMode())
-        Diag(BufferPtr, diag::charize_microsoft_ext);
+        Diag(BufferPtr, diag::ext_charize_microsoft);
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
     } else {
       // We parsed a # character.  If this occurs at the start of the line,
@@ -2954,7 +3212,7 @@ LexNextToken:
 
   case '@':
     // Objective C support.
-    if (CurPtr[-1] == '@' && Features.ObjC1)
+    if (CurPtr[-1] == '@' && LangOpts.ObjC1)
       Kind = tok::at;
     else
       Kind = tok::unknown;
author	Dimitry Andric <dim@FreeBSD.org>	2012-04-14 14:01:31 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2012-04-14 14:01:31 +0000
commit	dbe13110f59f48b4dbb7552b3ac2935acdeece7f (patch)
tree	be1815eb79b42ff482a8562b13c2dcbf0c5dcbee /lib/Lex/Lexer.cpp
parent	9da628931ebf2609493570f87824ca22402cc65f (diff)
download	src-test2-dbe13110f59f48b4dbb7552b3ac2935acdeece7f.tar.gz src-test2-dbe13110f59f48b4dbb7552b3ac2935acdeece7f.zip