diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:54:40 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:54:40 +0000 |
commit | 3d1dcd9bfdb15c49ee34d576a065079ac5c4d29f (patch) | |
tree | 0bbe07708f7571f8b5291f6d7b96c102b7c99dee /lib/Lex/LiteralSupport.cpp | |
parent | a0482fa4e7fa27b01184f938097f0666b78016dd (diff) | |
download | src-3d1dcd9bfdb15c49ee34d576a065079ac5c4d29f.tar.gz src-3d1dcd9bfdb15c49ee34d576a065079ac5c4d29f.zip |
Notes
Diffstat (limited to 'lib/Lex/LiteralSupport.cpp')
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 44 |
1 files changed, 37 insertions, 7 deletions
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index b8fd3ce9e9ff..fb543d0f03b3 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -170,6 +170,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf, static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, SourceLocation Loc, Preprocessor &PP, + bool wide, bool Complain) { // FIXME: Add a warning - UCN's are only valid in C++ & C99. // FIXME: Handle wide strings. @@ -190,6 +191,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, UTF32 UcnVal = 0; unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); + unsigned short UcnLenSave = UcnLen; for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) { int CharVal = HexDigitValue(ThisTokBuf[0]); if (CharVal == -1) break; @@ -214,6 +216,17 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, HadError = 1; return; } + if (wide) { + (void)UcnLenSave; + assert(UcnLenSave == 4 && + "ProcessUCNEscape - only ucn length of 4 supported"); + // little endian assumed. + *ResultBuf++ = (UcnVal & 0x000000FF); + *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8; + *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16; + *ResultBuf++ = (UcnVal & 0xFF000000) >> 24; + return; + } // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8. // The conversion below was inspired by: // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c @@ -323,7 +336,7 @@ NumericLiteralParser(const char *begin, const char *end, // Done. } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), - diag::err_invalid_decimal_digit) << std::string(s, s+1); + diag::err_invalid_decimal_digit) << llvm::StringRef(s, 1); hadError = true; return; } else if (*s == '.') { @@ -439,7 +452,7 @@ NumericLiteralParser(const char *begin, const char *end, PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin), isFPConstant ? diag::err_invalid_suffix_float_constant : diag::err_invalid_suffix_integer_constant) - << std::string(SuffixBegin, ThisTokEnd); + << llvm::StringRef(SuffixBegin, ThisTokEnd-SuffixBegin); hadError = true; return; } @@ -510,7 +523,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // Done. } else if (isxdigit(*s)) { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), - diag::err_invalid_binary_digit) << std::string(s, s+1); + diag::err_invalid_binary_digit) << llvm::StringRef(s, 1); hadError = true; } // Other suffixes will be diagnosed by the caller. @@ -540,7 +553,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // the code is using an incorrect base. if (isxdigit(*s) && *s != 'e' && *s != 'E') { PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), - diag::err_invalid_octal_digit) << std::string(s, s+1); + diag::err_invalid_octal_digit) << llvm::StringRef(s, 1); hadError = true; return; } @@ -830,12 +843,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, } const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. - + bool wide = false; // TODO: Input character set mapping support. // Skip L marker for wide strings. - if (ThisTokBuf[0] == 'L') + if (ThisTokBuf[0] == 'L') { + wide = true; ++ThisTokBuf; + } assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); ++ThisTokBuf; @@ -880,7 +895,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, // Is this a Universal Character Name escape? if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, - hadError, StringToks[i].getLocation(), PP, Complain); + hadError, StringToks[i].getLocation(), PP, wide, + Complain); continue; } // Otherwise, this is a non-UCN escape character. Process it. @@ -911,6 +927,20 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, hadError = 1; return; } + } else if (Complain) { + // Complain if this string literal has too many characters. + unsigned MaxChars = PP.getLangOptions().CPlusPlus? 65536 + : PP.getLangOptions().C99 ? 4095 + : 509; + + if (GetNumStringChars() > MaxChars) + PP.Diag(StringToks[0].getLocation(), diag::ext_string_too_long) + << GetNumStringChars() << MaxChars + << (PP.getLangOptions().CPlusPlus? 2 + : PP.getLangOptions().C99 ? 1 + : 0) + << SourceRange(StringToks[0].getLocation(), + StringToks[NumStringToks-1].getLocation()); } } |