7 files changed, 95 insertions, 77 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a91e40435cb0a..d5a46433c36a2 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -33,7 +33,7 @@
 #include <cctype>
 using namespace clang;
 
-static void InitCharacterInfo(LangOptions);
+static void InitCharacterInfo();
 
 //===----------------------------------------------------------------------===//
 // Token Class Implementation
@@ -59,7 +59,7 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
 
 void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
                       const char *BufEnd) {
-  InitCharacterInfo(Features);
+  InitCharacterInfo();
 
   BufferStart = BufStart;
   BufferPtr = BufPtr;
@@ -254,7 +254,7 @@ enum {
 
 // Statically initialize CharInfo table based on ASCII character set
 // Reference: FreeBSD 7.2 /usr/share/misc/ascii
-static unsigned char CharInfo[256] =
+static const unsigned char CharInfo[256] =
 {
 // 0 NUL         1 SOH         2 STX         3 ETX
 // 4 EOT         5 ENQ         6 ACK         7 BEL
@@ -322,7 +322,7 @@ static unsigned char CharInfo[256] =
    0           , 0           , 0           , 0
 };
 
-static void InitCharacterInfo(LangOptions Features) {
+static void InitCharacterInfo() {
   static bool isInited = false;
   if (isInited) return;
   // check the statically-initialized CharInfo table
@@ -341,10 +341,6 @@ static void InitCharacterInfo(LangOptions Features) {
   for (unsigned i = '0'; i <= '9'; ++i)
     assert(CHAR_NUMBER == CharInfo[i]);
     
-  if (Features.Microsoft)
-    // Hack to treat DOS & CP/M EOF (^Z) as horizontal whitespace.
-    CharInfo[26/*sub*/] = CHAR_HORZ_WS;  
-
   isInited = true;
 }
 
@@ -1549,6 +1545,22 @@ LexNextToken:
       return; // KeepWhitespaceMode
 
     goto LexNextToken;   // GCC isn't tail call eliminating.
+      
+  case 26:  // DOS & CP/M EOF: "^Z".
+    // If we're in Microsoft extensions mode, treat this as end of file.
+    if (Features.Microsoft) {
+      // Read the PP instance variable into an automatic variable, because
+      // LexEndOfFile will often delete 'this'.
+      Preprocessor *PPCache = PP;
+      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
+        return;   // Got a token to return.
+      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
+      return PPCache->Lex(Result);
+    }
+    // If Microsoft extensions are disabled, this is just random garbage.
+    Kind = tok::unknown;
+    break;
+      
   case '\n':
   case '\r':
     // If we are inside a preprocessor directive and we see the end of line,
@@ -1599,7 +1611,7 @@ LexNextToken:
       goto SkipHorizontalWhitespace;
     }
     goto LexNextToken;   // GCC isn't tail call eliminating.
-
+      
   // C99 6.4.4.1: Integer Constants.
   // C99 6.4.4.2: Floating Constants.
   case '0': case '1': case '2': case '3': case '4':
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index ab669422b277f..9aaa82d6263ce 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -610,28 +610,14 @@ bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
   return OverflowOccurred;
 }
 
-llvm::APFloat NumericLiteralParser::
-GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) {
+llvm::APFloat::opStatus
+NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
   using llvm::APFloat;
   using llvm::StringRef;
 
-  llvm::SmallVector<char,256> floatChars;
   unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
-  for (unsigned i = 0; i != n; ++i)
-    floatChars.push_back(ThisTokBegin[i]);
-
-  floatChars.push_back('\0');
-
-  APFloat V (Format, APFloat::fcZero, false);
-  APFloat::opStatus status;
-
-  status = V.convertFromString(StringRef(&floatChars[0], n),
-                               APFloat::rmNearestTiesToEven);
-
-  if (isExact)
-    *isExact = status == APFloat::opOK;
-
-  return V;
+  return Result.convertFromString(StringRef(ThisTokBegin, n),
+                                  APFloat::rmNearestTiesToEven);
 }
 
 
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 376cce8eb321e..2f1a34c832972 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -24,12 +24,37 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,
                              Preprocessor &PP) {
   assert(MI->isFunctionLike() &&
          "Can't have args for an object-like macro!");
-
-  // Allocate memory for the MacroArgs object with the lexer tokens at the end.
-  MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) +
-                                         NumToks*sizeof(Token));
-  // Construct the macroargs object.
-  new (Result) MacroArgs(NumToks, VarargsElided);
+  MacroArgs **ResultEnt = 0;
+  unsigned ClosestMatch = ~0U;
+  
+  // See if we have an entry with a big enough argument list to reuse on the
+  // free list.  If so, reuse it.
+  for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
+       Entry = &(*Entry)->ArgCache)
+    if ((*Entry)->NumUnexpArgTokens >= NumToks &&
+        (*Entry)->NumUnexpArgTokens < ClosestMatch) {
+      ResultEnt = Entry;
+      
+      // If we have an exact match, use it.
+      if ((*Entry)->NumUnexpArgTokens == NumToks)
+        break;
+      // Otherwise, use the best fit.
+      ClosestMatch = (*Entry)->NumUnexpArgTokens;
+    }
+  
+  MacroArgs *Result;
+  if (ResultEnt == 0) {
+    // Allocate memory for a MacroArgs object with the lexer tokens at the end.
+    Result = (MacroArgs*)malloc(sizeof(MacroArgs) + NumToks*sizeof(Token));
+    // Construct the MacroArgs object.
+    new (Result) MacroArgs(NumToks, VarargsElided);
+  } else {
+    Result = *ResultEnt;
+    // Unlink this node from the preprocessors singly linked list.
+    *ResultEnt = Result->ArgCache;
+    Result->NumUnexpArgTokens = NumToks;
+    Result->VarargsElided = VarargsElided;
+  }
 
   // Copy the actual unexpanded tokens to immediately after the result ptr.
   if (NumToks)
@@ -42,10 +67,16 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI,
 /// destroy - Destroy and deallocate the memory for this object.
 ///
 void MacroArgs::destroy(Preprocessor &PP) {
-  // Run the dtor to deallocate the vectors.
-  this->~MacroArgs();
-  // Release the memory for the object.
-  free(this);
+  StringifiedArgs.clear();
+
+  // Don't clear PreExpArgTokens, just clear the entries.  Clearing the entries
+  // would deallocate the element vectors.
+  for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
+    PreExpArgTokens[i].clear();
+  
+  // Add this to the preprocessor's free list.
+  ArgCache = PP.MacroArgCache;
+  PP.MacroArgCache = this;
 }
 
 /// deallocate - This should only be called by the Preprocessor when managing
@@ -110,13 +141,14 @@ bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
 /// getPreExpArgument - Return the pre-expanded form of the specified
 /// argument.
 const std::vector<Token> &
-MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) {
-  assert(Arg < NumUnexpArgTokens && "Invalid argument number!");
+MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, 
+                             Preprocessor &PP) {
+  assert(Arg < MI->getNumArgs() && "Invalid argument number!");
 
   // If we have already computed this, return it.
-  if (PreExpArgTokens.empty())
-    PreExpArgTokens.resize(NumUnexpArgTokens);
-
+  if (PreExpArgTokens.size() < MI->getNumArgs())
+    PreExpArgTokens.resize(MI->getNumArgs());
+  
   std::vector<Token> &Result = PreExpArgTokens[Arg];
   if (!Result.empty()) return Result;
 
@@ -156,7 +188,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
                                    Preprocessor &PP, bool Charify) {
   Token Tok;
   Tok.startToken();
-  Tok.setKind(tok::string_literal);
+  Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
 
   const Token *ArgTokStart = ArgToks;
 
diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h
index fa040c7a4d6f5..6ff4856b4e1c0 100644
--- a/lib/Lex/MacroArgs.h
+++ b/lib/Lex/MacroArgs.h
@@ -82,7 +82,7 @@ public:
   /// getPreExpArgument - Return the pre-expanded form of the specified
   /// argument.
   const std::vector<Token> &
-    getPreExpArgument(unsigned Arg, Preprocessor &PP);
+    getPreExpArgument(unsigned Arg, const MacroInfo *MI, Preprocessor &PP);
 
   /// getStringifiedArgument - Compute, cache, and return the specified argument
   /// that has been 'stringified' as required by the # operator.
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index f5c60eb494386..9e3d283d88861 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -220,32 +220,28 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
     // Get the identifier name without trigraphs or embedded newlines.  Note
     // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
     // when skipping.
-    // TODO: could do this with zero copies in the no-clean case by using
-    // strncmp below.
-    char Directive[20];
-    unsigned IdLen;
+    char DirectiveBuf[20];
+    llvm::StringRef Directive;
     if (!Tok.needsCleaning() && Tok.getLength() < 20) {
-      IdLen = Tok.getLength();
-      memcpy(Directive, RawCharData, IdLen);
-      Directive[IdLen] = 0;
+      Directive = llvm::StringRef(RawCharData, Tok.getLength());
     } else {
       std::string DirectiveStr = getSpelling(Tok);
-      IdLen = DirectiveStr.size();
+      unsigned IdLen = DirectiveStr.size();
       if (IdLen >= 20) {
         CurPPLexer->ParsingPreprocessorDirective = false;
         // Restore comment saving mode.
         if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
         continue;
       }
-      memcpy(Directive, &DirectiveStr[0], IdLen);
-      Directive[IdLen] = 0;
-      FirstChar = Directive[0];
+      memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
+      Directive = llvm::StringRef(DirectiveBuf, IdLen);
     }
 
-    if (FirstChar == 'i' && Directive[1] == 'f') {
-      if ((IdLen == 2) ||   // "if"
-          (IdLen == 5 && !strcmp(Directive+2, "def")) ||   // "ifdef"
-          (IdLen == 6 && !strcmp(Directive+2, "ndef"))) {  // "ifndef"
+    if (Directive.startswith("if")) {
+      llvm::StringRef Sub = Directive.substr(2);
+      if (Sub.empty() ||   // "if"
+          Sub == "def" ||   // "ifdef"
+          Sub == "ndef") {  // "ifndef"
         // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
         // bother parsing the condition.
         DiscardUntilEndOfDirective();
@@ -253,8 +249,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
                                        /*foundnonskip*/false,
                                        /*fnddelse*/false);
       }
-    } else if (FirstChar == 'e') {
-      if (IdLen == 5 && !strcmp(Directive+1, "ndif")) {  // "endif"
+    } else if (Directive[0] == 'e') {
+      llvm::StringRef Sub = Directive.substr(1);
+      if (Sub == "ndif") {  // "endif"
         CheckEndOfDirective("endif");
         PPConditionalInfo CondInfo;
         CondInfo.WasSkipping = true; // Silence bogus warning.
@@ -265,7 +262,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
         // If we popped the outermost skipping block, we're done skipping!
         if (!CondInfo.WasSkipping)
           break;
-      } else if (IdLen == 4 && !strcmp(Directive+1, "lse")) { // "else".
+      } else if (Sub == "lse") { // "else".
         // #else directive in a skipping conditional.  If not in some other
         // skipping conditional, and if #else hasn't already been seen, enter it
         // as a non-skipping conditional.
@@ -284,7 +281,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
           CondInfo.FoundNonSkip = true;
           break;
         }
-      } else if (IdLen == 4 && !strcmp(Directive+1, "lif")) {  // "elif".
+      } else if (Sub == "lif") {  // "elif".
         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
 
         bool ShouldEnter;
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index d4e441b2f1833..81966cb2b9181 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -446,19 +446,10 @@ void Preprocessor::EnterMainSourceFile() {
   if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
     HeaderInfo.IncrementIncludeCount(FE);
 
-  std::vector<char> PrologFile;
-  PrologFile.reserve(4080);
-
-  // FIXME: Don't make a copy.
-  PrologFile.insert(PrologFile.end(), Predefines.begin(), Predefines.end());
-
-  // Memory buffer must end with a null byte!
-  PrologFile.push_back(0);
-
-  // Now that we have emitted the predefined macros, #includes, etc into
-  // PrologFile, preprocess it to populate the initial preprocessor state.
+  // Preprocess Predefines to populate the initial preprocessor state.
   llvm::MemoryBuffer *SB =
-    llvm::MemoryBuffer::getMemBufferCopy(&PrologFile.front(),&PrologFile.back(),
+    llvm::MemoryBuffer::getMemBufferCopy(Predefines.data(),
+                                         Predefines.data() + Predefines.size(),
                                          "<built-in>");
   assert(SB && "Cannot fail to create predefined source buffer");
   FileID FID = SourceMgr.createFileIDForMemBuffer(SB);
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index a40bb62db46d5..5d95eb39c89e6 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -175,7 +175,7 @@ void TokenLexer::ExpandFunctionArguments() {
       // avoids some work in common cases.
       const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
       if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
-        ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
+        ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0];
       else
         ResultArgToks = ArgTok;  // Use non-preexpanded tokens.
 
@@ -414,7 +414,7 @@ bool TokenLexer::PasteTokens(Token &Tok) {
     ResultTokTmp.startToken();
 
     // Claim that the tmp token is a string_literal so that we can get the
-    // character pointer back from CreateString.
+    // character pointer back from CreateString in getLiteralData().
     ResultTokTmp.setKind(tok::string_literal);
     PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp);
     SourceLocation ResultTokLoc = ResultTokTmp.getLocation();