diff options
Diffstat (limited to 'lib/Lex/LiteralSupport.cpp')
| -rw-r--r-- | lib/Lex/LiteralSupport.cpp | 44 | 
1 files changed, 37 insertions, 7 deletions
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index b8fd3ce9e9ff..fb543d0f03b3 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -170,6 +170,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,  static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,                               char *&ResultBuf, bool &HadError,                               SourceLocation Loc, Preprocessor &PP, +                             bool wide,                               bool Complain) {    // FIXME: Add a warning - UCN's are only valid in C++ & C99.    // FIXME: Handle wide strings. @@ -190,6 +191,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,    UTF32 UcnVal = 0;    unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); +  unsigned short UcnLenSave = UcnLen;    for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {      int CharVal = HexDigitValue(ThisTokBuf[0]);      if (CharVal == -1) break; @@ -214,6 +216,17 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,      HadError = 1;      return;    } +  if (wide) { +    (void)UcnLenSave; +    assert(UcnLenSave == 4 &&  +           "ProcessUCNEscape - only ucn length of 4 supported"); +    // little endian assumed. +    *ResultBuf++ = (UcnVal & 0x000000FF); +    *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; +    *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16; +    *ResultBuf++ = (UcnVal & 0xFF000000) >> 24; +    return; +  }    // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.    // The conversion below was inspired by:    //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c @@ -323,7 +336,7 @@ NumericLiteralParser(const char *begin, const char *end,        // Done.      } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), -              diag::err_invalid_decimal_digit) << std::string(s, s+1); +              diag::err_invalid_decimal_digit) << llvm::StringRef(s, 1);        hadError = true;        return;      } else if (*s == '.') { @@ -439,7 +452,7 @@ NumericLiteralParser(const char *begin, const char *end,      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),              isFPConstant ? diag::err_invalid_suffix_float_constant :                             diag::err_invalid_suffix_integer_constant) -      << std::string(SuffixBegin, ThisTokEnd); +      << llvm::StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);      hadError = true;      return;    } @@ -510,7 +523,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {        // Done.      } else if (isxdigit(*s)) {        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), -              diag::err_invalid_binary_digit) << std::string(s, s+1); +              diag::err_invalid_binary_digit) << llvm::StringRef(s, 1);        hadError = true;      }      // Other suffixes will be diagnosed by the caller. @@ -540,7 +553,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {    // the code is using an incorrect base.    if (isxdigit(*s) && *s != 'e' && *s != 'E') {      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), -            diag::err_invalid_octal_digit) << std::string(s, s+1); +            diag::err_invalid_octal_digit) << llvm::StringRef(s, 1);      hadError = true;      return;    } @@ -830,12 +843,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,      }      const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote. - +    bool wide = false;      // TODO: Input character set mapping support.      // Skip L marker for wide strings. -    if (ThisTokBuf[0] == 'L') +    if (ThisTokBuf[0] == 'L') { +      wide = true;        ++ThisTokBuf; +    }      assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");      ++ThisTokBuf; @@ -880,7 +895,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,        // Is this a Universal Character Name escape?        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {          ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, -                         hadError, StringToks[i].getLocation(), PP, Complain); +                         hadError, StringToks[i].getLocation(), PP, wide,  +                         Complain);          continue;        }        // Otherwise, this is a non-UCN escape character.  Process it. @@ -911,6 +927,20 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,        hadError = 1;        return;      } +  } else if (Complain) { +    // Complain if this string literal has too many characters. +    unsigned MaxChars = PP.getLangOptions().CPlusPlus? 65536 +                      : PP.getLangOptions().C99 ? 4095 +                      : 509; +     +    if (GetNumStringChars() > MaxChars) +      PP.Diag(StringToks[0].getLocation(), diag::ext_string_too_long) +        << GetNumStringChars() << MaxChars +        << (PP.getLangOptions().CPlusPlus? 2 +            : PP.getLangOptions().C99 ? 1 +            : 0) +        << SourceRange(StringToks[0].getLocation(), +                       StringToks[NumStringToks-1].getLocation());    }  }  | 
