diff options
Diffstat (limited to 'source/Plugins/ExpressionParser/Go/GoLexer.cpp')
| -rw-r--r-- | source/Plugins/ExpressionParser/Go/GoLexer.cpp | 402 | 
1 files changed, 402 insertions, 0 deletions
diff --git a/source/Plugins/ExpressionParser/Go/GoLexer.cpp b/source/Plugins/ExpressionParser/Go/GoLexer.cpp new file mode 100644 index 0000000000000..6de0f5619ca8c --- /dev/null +++ b/source/Plugins/ExpressionParser/Go/GoLexer.cpp @@ -0,0 +1,402 @@ +//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <string.h> + +#include "GoLexer.h" + +using namespace lldb_private; + +llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords; + +GoLexer::GoLexer(const char *src) : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") +{ +} + +bool +GoLexer::SkipWhitespace() +{ +    bool saw_newline = false; +    for (; m_src < m_end; ++m_src) +    { +        if (*m_src == '\n') +            saw_newline = true; +        if (*m_src == '/' && !SkipComment()) +            return saw_newline; +        else if (!IsWhitespace(*m_src)) +            return saw_newline; +    } +    return saw_newline; +} + +bool +GoLexer::SkipComment() +{ +    if (m_src[0] == '/' && m_src[1] == '/') +    { +        for (const char *c = m_src + 2; c < m_end; ++c) +        { +            if (*c == '\n') +            { +                m_src = c - 1; +                return true; +            } +        } +        return true; +    } +    else if (m_src[0] == '/' && m_src[1] == '*') +    { +        for (const char *c = m_src + 2; c < m_end; ++c) +        { +            if (c[0] == '*' && c[1] == '/') +            { +                m_src = c + 1; +                return true; +            } +        } +    } +    return false; +} + +const GoLexer::Token & +GoLexer::Lex() +{ +    bool newline = SkipWhitespace(); +    const char *start = m_src; +    m_last_token.m_type = InternalLex(newline); +    m_last_token.m_value = llvm::StringRef(start, m_src - start); +    return m_last_token; +} + +GoLexer::TokenType +GoLexer::InternalLex(bool newline) +{ +    if (m_src >= m_end) +    { +        return TOK_EOF; +    } +    if (newline) +    { +        switch (m_last_token.m_type) +        { +            case TOK_IDENTIFIER: +            case LIT_FLOAT: +            case LIT_IMAGINARY: +            case LIT_INTEGER: +            case LIT_RUNE: +            case LIT_STRING: +            case KEYWORD_BREAK: +            case KEYWORD_CONTINUE: +            case KEYWORD_FALLTHROUGH: +            case KEYWORD_RETURN: +            case OP_PLUS_PLUS: +            case OP_MINUS_MINUS: +            case OP_RPAREN: +            case OP_RBRACK: +            case OP_RBRACE: +                return OP_SEMICOLON; +            default: +                break; +        } +    } +    char c = *m_src; +    switch (c) +    { +        case '0': +        case '1': +        case '2': +        case '3': +        case '4': +        case '5': +        case '6': +        case '7': +        case '8': +        case '9': +            return DoNumber(); +        case '+': +        case '-': +        case '*': +        case '/': +        case '%': +        case '&': +        case '|': +        case '^': +        case '<': +        case '>': +        case '!': +        case ':': +        case ';': +        case '(': +        case ')': +        case '[': +        case ']': +        case '{': +        case '}': +        case ',': +        case '=': +            return DoOperator(); +        case '.': +            if (IsDecimal(m_src[1])) +                return DoNumber(); +            return DoOperator(); +        case '$': +            // For lldb persistent vars. +            return DoIdent(); +        case '"': +        case '`': +            return DoString(); +        case '\'': +            return DoRune(); +        default: +            break; +    } +    if (IsLetterOrDigit(c)) +        return DoIdent(); +    ++m_src; +    return TOK_INVALID; +} + +GoLexer::TokenType +GoLexer::DoOperator() +{ +    TokenType t = TOK_INVALID; +    if (m_end - m_src > 2) +    { +        t = LookupKeyword(llvm::StringRef(m_src, 3)); +        if (t != TOK_INVALID) +            m_src += 3; +    } +    if (t == TOK_INVALID && m_end - m_src > 1) +    { +        t = LookupKeyword(llvm::StringRef(m_src, 2)); +        if (t != TOK_INVALID) +            m_src += 2; +    } +    if (t == TOK_INVALID) +    { +        t = LookupKeyword(llvm::StringRef(m_src, 1)); +        ++m_src; +    } +    return t; +} + +GoLexer::TokenType +GoLexer::DoIdent() +{ +    const char *start = m_src++; +    while (m_src < m_end && IsLetterOrDigit(*m_src)) +    { +        ++m_src; +    } +    TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start)); +    if (kw != TOK_INVALID) +        return kw; +    return TOK_IDENTIFIER; +} + +GoLexer::TokenType +GoLexer::DoNumber() +{ +    if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) +    { +        m_src += 2; +        while (IsHexChar(*m_src)) +            ++m_src; +        return LIT_INTEGER; +    } +    bool dot_ok = true; +    bool e_ok = true; +    while (true) +    { +        while (IsDecimal(*m_src)) +            ++m_src; +        switch (*m_src) +        { +            case 'i': +                ++m_src; +                return LIT_IMAGINARY; +            case '.': +                if (!dot_ok) +                    return LIT_FLOAT; +                ++m_src; +                dot_ok = false; +                break; +            case 'e': +            case 'E': +                if (!e_ok) +                    return LIT_FLOAT; +                dot_ok = e_ok = false; +                ++m_src; +                if (*m_src == '+' || *m_src == '-') +                    ++m_src; +                break; +            default: +                if (dot_ok) +                    return LIT_INTEGER; +                return LIT_FLOAT; +        } +    } +} + +GoLexer::TokenType +GoLexer::DoRune() +{ +    while (++m_src < m_end) +    { +        switch (*m_src) +        { +            case '\'': +                ++m_src; +                return LIT_RUNE; +            case '\n': +                return TOK_INVALID; +            case '\\': +                if (m_src[1] == '\n') +                    return TOK_INVALID; +                ++m_src; +        } +    } +    return TOK_INVALID; +} + +GoLexer::TokenType +GoLexer::DoString() +{ +    if (*m_src == '`') +    { +        while (++m_src < m_end) +        { +            if (*m_src == '`') +            { +                ++m_src; +                return LIT_STRING; +            } +        } +        return TOK_INVALID; +    } +    while (++m_src < m_end) +    { +        switch (*m_src) +        { +            case '"': +                ++m_src; +                return LIT_STRING; +            case '\n': +                return TOK_INVALID; +            case '\\': +                if (m_src[1] == '\n') +                    return TOK_INVALID; +                ++m_src; +        } +    } +    return TOK_INVALID; +} + +GoLexer::TokenType +GoLexer::LookupKeyword(llvm::StringRef id) +{ +    if (m_keywords == nullptr) +        m_keywords = InitKeywords(); +    const auto &it = m_keywords->find(id); +    if (it == m_keywords->end()) +        return TOK_INVALID; +    return it->second; +} + +llvm::StringRef +GoLexer::LookupToken(TokenType t) +{ +    if (m_keywords == nullptr) +        m_keywords = InitKeywords(); +    for (const auto &e : *m_keywords) +    { +        if (e.getValue() == t) +            return e.getKey(); +    } +    return ""; +} + +llvm::StringMap<GoLexer::TokenType> * +GoLexer::InitKeywords() +{ +    auto &result = *new llvm::StringMap<TokenType>(128); +    result["break"] = KEYWORD_BREAK; +    result["default"] = KEYWORD_DEFAULT; +    result["func"] = KEYWORD_FUNC; +    result["interface"] = KEYWORD_INTERFACE; +    result["select"] = KEYWORD_SELECT; +    result["case"] = KEYWORD_CASE; +    result["defer"] = KEYWORD_DEFER; +    result["go"] = KEYWORD_GO; +    result["map"] = KEYWORD_MAP; +    result["struct"] = KEYWORD_STRUCT; +    result["chan"] = KEYWORD_CHAN; +    result["else"] = KEYWORD_ELSE; +    result["goto"] = KEYWORD_GOTO; +    result["package"] = KEYWORD_PACKAGE; +    result["switch"] = KEYWORD_SWITCH; +    result["const"] = KEYWORD_CONST; +    result["fallthrough"] = KEYWORD_FALLTHROUGH; +    result["if"] = KEYWORD_IF; +    result["range"] = KEYWORD_RANGE; +    result["type"] = KEYWORD_TYPE; +    result["continue"] = KEYWORD_CONTINUE; +    result["for"] = KEYWORD_FOR; +    result["import"] = KEYWORD_IMPORT; +    result["return"] = KEYWORD_RETURN; +    result["var"] = KEYWORD_VAR; +    result["+"] = OP_PLUS; +    result["-"] = OP_MINUS; +    result["*"] = OP_STAR; +    result["/"] = OP_SLASH; +    result["%"] = OP_PERCENT; +    result["&"] = OP_AMP; +    result["|"] = OP_PIPE; +    result["^"] = OP_CARET; +    result["<<"] = OP_LSHIFT; +    result[">>"] = OP_RSHIFT; +    result["&^"] = OP_AMP_CARET; +    result["+="] = OP_PLUS_EQ; +    result["-="] = OP_MINUS_EQ; +    result["*="] = OP_STAR_EQ; +    result["/="] = OP_SLASH_EQ; +    result["%="] = OP_PERCENT_EQ; +    result["&="] = OP_AMP_EQ; +    result["|="] = OP_PIPE_EQ; +    result["^="] = OP_CARET_EQ; +    result["<<="] = OP_LSHIFT_EQ; +    result[">>="] = OP_RSHIFT_EQ; +    result["&^="] = OP_AMP_CARET_EQ; +    result["&&"] = OP_AMP_AMP; +    result["||"] = OP_PIPE_PIPE; +    result["<-"] = OP_LT_MINUS; +    result["++"] = OP_PLUS_PLUS; +    result["--"] = OP_MINUS_MINUS; +    result["=="] = OP_EQ_EQ; +    result["<"] = OP_LT; +    result[">"] = OP_GT; +    result["="] = OP_EQ; +    result["!"] = OP_BANG; +    result["!="] = OP_BANG_EQ; +    result["<="] = OP_LT_EQ; +    result[">="] = OP_GT_EQ; +    result[":="] = OP_COLON_EQ; +    result["..."] = OP_DOTS; +    result["("] = OP_LPAREN; +    result["["] = OP_LBRACK; +    result["{"] = OP_LBRACE; +    result[","] = OP_COMMA; +    result["."] = OP_DOT; +    result[")"] = OP_RPAREN; +    result["]"] = OP_RBRACK; +    result["}"] = OP_RBRACE; +    result[";"] = OP_SEMICOLON; +    result[":"] = OP_COLON; +    return &result; +}  | 
