summaryrefslogtreecommitdiff
path: root/include/clang/Lex/Lexer.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/clang/Lex/Lexer.h')
-rw-r--r--include/clang/Lex/Lexer.h50
1 files changed, 42 insertions, 8 deletions
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 407b644fd74c..cb4f57fb9cf4 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -14,11 +14,11 @@
#ifndef LLVM_CLANG_LEXER_H
#define LLVM_CLANG_LEXER_H
-#include "clang/Lex/PreprocessorLexer.h"
#include "clang/Basic/LangOptions.h"
+#include "clang/Lex/PreprocessorLexer.h"
#include "llvm/ADT/SmallVector.h"
-#include <string>
#include <cassert>
+#include <string>
namespace clang {
class DiagnosticsEngine;
@@ -174,8 +174,8 @@ public:
/// SetKeepWhitespaceMode - This method lets clients enable or disable
/// whitespace retention mode.
void SetKeepWhitespaceMode(bool Val) {
- assert((!Val || LexingRawMode) &&
- "Can only enable whitespace retention in raw mode");
+ assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
+ "Can only retain whitespace in raw mode or -traditional-cpp");
ExtendedTokenMode = Val ? 2 : 0;
}
@@ -194,6 +194,14 @@ public:
ExtendedTokenMode = Mode ? 1 : 0;
}
+ /// Sets the extended token mode back to its initial value, according to the
+ /// language options and preprocessor. This controls whether the lexer
+ /// produces comment and whitespace tokens.
+ ///
+ /// This requires the lexer to have an associated preprocessor. A standalone
+ /// lexer has nothing to reset to.
+ void resetExtendedTokenMode();
+
const char *getBufferStart() const { return BufferStart; }
/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
@@ -260,10 +268,10 @@ public:
/// location and does not jump to the expansion or spelling
/// location.
static StringRef getSpelling(SourceLocation loc,
- SmallVectorImpl<char> &buffer,
- const SourceManager &SourceMgr,
- const LangOptions &LangOpts,
- bool *invalid = 0);
+ SmallVectorImpl<char> &buffer,
+ const SourceManager &SourceMgr,
+ const LangOptions &LangOpts,
+ bool *invalid = 0);
/// MeasureTokenLength - Relex the token at the specified location and return
/// its length in bytes in the input file. If the token needs cleaning (e.g.
@@ -273,6 +281,12 @@ public:
const SourceManager &SM,
const LangOptions &LangOpts);
+ /// \brief Relex the token at the specified location.
+ /// \returns true if there was a failure, false on success.
+ static bool getRawToken(SourceLocation Loc, Token &Result,
+ const SourceManager &SM,
+ const LangOptions &LangOpts);
+
/// \brief Given a location any where in a source buffer, find the location
/// that corresponds to the beginning of the token in which the original
/// source location lands.
@@ -431,6 +445,11 @@ private:
///
void LexTokenInternal(Token &Result);
+ /// Given that a token begins with the Unicode character \p C, figure out
+ /// what kind of token it is and dispatch to the appropriate lexing helper
+ /// function.
+ void LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
+
/// FormTokenWithChars - When we lex a token, we have identified a span
/// starting at BufferPtr, going to TokEnd that forms the token. This method
/// takes that range and assigns it to the token as its location and size. In
@@ -573,6 +592,21 @@ private:
void cutOffLexing() { BufferPtr = BufferEnd; }
bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);
+
+
+ /// Read a universal character name.
+ ///
+ /// \param CurPtr The position in the source buffer after the initial '\'.
+ /// If the UCN is syntactically well-formed (but not necessarily
+ /// valid), this parameter will be updated to point to the
+ /// character after the UCN.
+ /// \param SlashLoc The position in the source buffer of the '\'.
+ /// \param Tok The token being formed. Pass \c NULL to suppress diagnostics
+ /// and handle token formation in the caller.
+ ///
+ /// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is
+ /// invalid.
+ uint32_t tryReadUCN(const char *&CurPtr, const char *SlashLoc, Token *Tok);
};