diff options
Diffstat (limited to 'llvm/lib/CodeGen/MIRParser')
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/MILexer.cpp | 737 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/MILexer.h | 236 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/MIParser.cpp | 3089 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 954 |
4 files changed, 5016 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp new file mode 100644 index 000000000000..ad5c617623f2 --- /dev/null +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -0,0 +1,737 @@ +//===- MILexer.cpp - Machine instructions lexer implementation ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the lexing of machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "MILexer.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <string> + +using namespace llvm; + +namespace { + +using ErrorCallbackType = + function_ref<void(StringRef::iterator Loc, const Twine &)>; + +/// This class provides a way to iterate and get characters from the source +/// string. +class Cursor { + const char *Ptr = nullptr; + const char *End = nullptr; + +public: + Cursor(NoneType) {} + + explicit Cursor(StringRef Str) { + Ptr = Str.data(); + End = Ptr + Str.size(); + } + + bool isEOF() const { return Ptr == End; } + + char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } + + void advance(unsigned I = 1) { Ptr += I; } + + StringRef remaining() const { return StringRef(Ptr, End - Ptr); } + + StringRef upto(Cursor C) const { + assert(C.Ptr >= Ptr && C.Ptr <= End); + return StringRef(Ptr, C.Ptr - Ptr); + } + + StringRef::iterator location() const { return Ptr; } + + operator bool() const { return Ptr != nullptr; } +}; + +} // end anonymous namespace + +MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { + this->Kind = Kind; + this->Range = Range; + return *this; +} + +MIToken &MIToken::setStringValue(StringRef StrVal) { + StringValue = StrVal; + return *this; +} + +MIToken &MIToken::setOwnedStringValue(std::string StrVal) { + StringValueStorage = std::move(StrVal); + StringValue = StringValueStorage; + return *this; +} + +MIToken &MIToken::setIntegerValue(APSInt IntVal) { + this->IntVal = std::move(IntVal); + return *this; +} + +/// Skip the leading whitespace characters and return the updated cursor. +static Cursor skipWhitespace(Cursor C) { + while (isblank(C.peek())) + C.advance(); + return C; +} + +static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } + +/// Skip a line comment and return the updated cursor. +static Cursor skipComment(Cursor C) { + if (C.peek() != ';') + return C; + while (!isNewlineChar(C.peek()) && !C.isEOF()) + C.advance(); + return C; +} + +/// Return true if the given character satisfies the following regular +/// expression: [-a-zA-Z$._0-9] +static bool isIdentifierChar(char C) { + return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || + C == '$'; +} + +/// Unescapes the given string value. +/// +/// Expects the string value to be quoted. +static std::string unescapeQuotedString(StringRef Value) { + assert(Value.front() == '"' && Value.back() == '"'); + Cursor C = Cursor(Value.substr(1, Value.size() - 2)); + + std::string Str; + Str.reserve(C.remaining().size()); + while (!C.isEOF()) { + char Char = C.peek(); + if (Char == '\\') { + if (C.peek(1) == '\\') { + // Two '\' become one + Str += '\\'; + C.advance(2); + continue; + } + if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { + Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); + C.advance(3); + continue; + } + } + Str += Char; + C.advance(); + } + return Str; +} + +/// Lex a string constant using the following regular expression: \"[^\"]*\" +static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { + assert(C.peek() == '"'); + for (C.advance(); C.peek() != '"'; C.advance()) { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '\"'"); + return None; + } + } + C.advance(); + return C; +} + +static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, + unsigned PrefixLength, ErrorCallbackType ErrorCallback) { + auto Range = C; + C.advance(PrefixLength); + if (C.peek() == '"') { + if (Cursor R = lexStringConstant(C, ErrorCallback)) { + StringRef String = Range.upto(R); + Token.reset(Type, String) + .setOwnedStringValue( + unescapeQuotedString(String.drop_front(PrefixLength))); + return R; + } + Token.reset(MIToken::Error, Range.remaining()); + return Range; + } + while (isIdentifierChar(C.peek())) + C.advance(); + Token.reset(Type, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(PrefixLength)); + return C; +} + +static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { + return StringSwitch<MIToken::TokenKind>(Identifier) + .Case("_", MIToken::underscore) + .Case("implicit", MIToken::kw_implicit) + .Case("implicit-def", MIToken::kw_implicit_define) + .Case("def", MIToken::kw_def) + .Case("dead", MIToken::kw_dead) + .Case("killed", MIToken::kw_killed) + .Case("undef", MIToken::kw_undef) + .Case("internal", MIToken::kw_internal) + .Case("early-clobber", MIToken::kw_early_clobber) + .Case("debug-use", MIToken::kw_debug_use) + .Case("renamable", MIToken::kw_renamable) + .Case("tied-def", MIToken::kw_tied_def) + .Case("frame-setup", MIToken::kw_frame_setup) + .Case("frame-destroy", MIToken::kw_frame_destroy) + .Case("nnan", MIToken::kw_nnan) + .Case("ninf", MIToken::kw_ninf) + .Case("nsz", MIToken::kw_nsz) + .Case("arcp", MIToken::kw_arcp) + .Case("contract", MIToken::kw_contract) + .Case("afn", MIToken::kw_afn) + .Case("reassoc", MIToken::kw_reassoc) + .Case("nuw" , MIToken::kw_nuw) + .Case("nsw" , MIToken::kw_nsw) + .Case("exact" , MIToken::kw_exact) + .Case("fpexcept", MIToken::kw_fpexcept) + .Case("debug-location", MIToken::kw_debug_location) + .Case("same_value", MIToken::kw_cfi_same_value) + .Case("offset", MIToken::kw_cfi_offset) + .Case("rel_offset", MIToken::kw_cfi_rel_offset) + .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) + .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) + .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) + .Case("escape", MIToken::kw_cfi_escape) + .Case("def_cfa", MIToken::kw_cfi_def_cfa) + .Case("remember_state", MIToken::kw_cfi_remember_state) + .Case("restore", MIToken::kw_cfi_restore) + .Case("restore_state", MIToken::kw_cfi_restore_state) + .Case("undefined", MIToken::kw_cfi_undefined) + .Case("register", MIToken::kw_cfi_register) + .Case("window_save", MIToken::kw_cfi_window_save) + .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) + .Case("blockaddress", MIToken::kw_blockaddress) + .Case("intrinsic", MIToken::kw_intrinsic) + .Case("target-index", MIToken::kw_target_index) + .Case("half", MIToken::kw_half) + .Case("float", MIToken::kw_float) + .Case("double", MIToken::kw_double) + .Case("x86_fp80", MIToken::kw_x86_fp80) + .Case("fp128", MIToken::kw_fp128) + .Case("ppc_fp128", MIToken::kw_ppc_fp128) + .Case("target-flags", MIToken::kw_target_flags) + .Case("volatile", MIToken::kw_volatile) + .Case("non-temporal", MIToken::kw_non_temporal) + .Case("dereferenceable", MIToken::kw_dereferenceable) + .Case("invariant", MIToken::kw_invariant) + .Case("align", MIToken::kw_align) + .Case("addrspace", MIToken::kw_addrspace) + .Case("stack", MIToken::kw_stack) + .Case("got", MIToken::kw_got) + .Case("jump-table", MIToken::kw_jump_table) + .Case("constant-pool", MIToken::kw_constant_pool) + .Case("call-entry", MIToken::kw_call_entry) + .Case("liveout", MIToken::kw_liveout) + .Case("address-taken", MIToken::kw_address_taken) + .Case("landing-pad", MIToken::kw_landing_pad) + .Case("liveins", MIToken::kw_liveins) + .Case("successors", MIToken::kw_successors) + .Case("floatpred", MIToken::kw_floatpred) + .Case("intpred", MIToken::kw_intpred) + .Case("shufflemask", MIToken::kw_shufflemask) + .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) + .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) + .Case("unknown-size", MIToken::kw_unknown_size) + .Default(MIToken::Identifier); +} + +static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { + if (!isalpha(C.peek()) && C.peek() != '_') + return None; + auto Range = C; + while (isIdentifierChar(C.peek())) + C.advance(); + auto Identifier = Range.upto(C); + Token.reset(getIdentifierKind(Identifier), Identifier) + .setStringValue(Identifier); + return C; +} + +static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + bool IsReference = C.remaining().startswith("%bb."); + if (!IsReference && !C.remaining().startswith("bb.")) + return None; + auto Range = C; + unsigned PrefixLength = IsReference ? 4 : 3; + C.advance(PrefixLength); // Skip '%bb.' or 'bb.' + if (!isdigit(C.peek())) { + Token.reset(MIToken::Error, C.remaining()); + ErrorCallback(C.location(), "expected a number after '%bb.'"); + return C; + } + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + StringRef Number = NumberRange.upto(C); + unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' + // TODO: The format bb.<id>.<irname> is supported only when it's not a + // reference. Once we deprecate the format where the irname shows up, we + // should only lex forward if it is a reference. + if (C.peek() == '.') { + C.advance(); // Skip '.' + ++StringOffset; + while (isIdentifierChar(C.peek())) + C.advance(); + } + Token.reset(IsReference ? MIToken::MachineBasicBlock + : MIToken::MachineBasicBlockLabel, + Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); + return C; +} + +static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + StringRef Number = NumberRange.upto(C); + unsigned StringOffset = Rule.size() + Number.size(); + if (C.peek() == '.') { + C.advance(); + ++StringOffset; + while (isIdentifierChar(C.peek())) + C.advance(); + } + Token.reset(Kind, Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); + return C; +} + +static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); +} + +static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { + return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); +} + +static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); +} + +static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); +} + +static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + const StringRef Rule = "%subreg."; + if (!C.remaining().startswith(Rule)) + return None; + return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), + ErrorCallback); +} + +static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + const StringRef Rule = "%ir-block."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); + return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); +} + +static Cursor maybeLexIRValue(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + const StringRef Rule = "%ir."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRValue); + return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); +} + +static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '"') + return None; + return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, + ErrorCallback); +} + +static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { + auto Range = C; + C.advance(); // Skip '%' + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(MIToken::VirtualRegister, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +/// Returns true for a character allowed in a register name. +static bool isRegisterChar(char C) { + return isIdentifierChar(C) && C != '.'; +} + +static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { + Cursor Range = C; + C.advance(); // Skip '%' + while (isRegisterChar(C.peek())) + C.advance(); + Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' + return C; +} + +static Cursor maybeLexRegister(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '%' && C.peek() != '$') + return None; + + if (C.peek() == '%') { + if (isdigit(C.peek(1))) + return lexVirtualRegister(C, Token); + + if (isRegisterChar(C.peek(1))) + return lexNamedVirtualRegister(C, Token); + + return None; + } + + assert(C.peek() == '$'); + auto Range = C; + C.advance(); // Skip '$' + while (isRegisterChar(C.peek())) + C.advance(); + Token.reset(MIToken::NamedRegister, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$' + return C; +} + +static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '@') + return None; + if (!isdigit(C.peek(1))) + return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, + ErrorCallback); + auto Range = C; + C.advance(1); // Skip the '@' + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(MIToken::GlobalValue, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '&') + return None; + return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, + ErrorCallback); +} + +static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + const StringRef Rule = "<mcsymbol "; + if (!C.remaining().startswith(Rule)) + return None; + auto Start = C; + C.advance(Rule.size()); + + // Try a simple unquoted name. + if (C.peek() != '"') { + while (isIdentifierChar(C.peek())) + C.advance(); + StringRef String = Start.upto(C).drop_front(Rule.size()); + if (C.peek() != '>') { + ErrorCallback(C.location(), + "expected the '<mcsymbol ...' to be closed by a '>'"); + Token.reset(MIToken::Error, Start.remaining()); + return Start; + } + C.advance(); + + Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String); + return C; + } + + // Otherwise lex out a quoted name. + Cursor R = lexStringConstant(C, ErrorCallback); + if (!R) { + ErrorCallback(C.location(), + "unable to parse quoted string from opening quote"); + Token.reset(MIToken::Error, Start.remaining()); + return Start; + } + StringRef String = Start.upto(R).drop_front(Rule.size()); + if (R.peek() != '>') { + ErrorCallback(R.location(), + "expected the '<mcsymbol ...' to be closed by a '>'"); + Token.reset(MIToken::Error, Start.remaining()); + return Start; + } + R.advance(); + + Token.reset(MIToken::MCSymbol, Start.upto(R)) + .setOwnedStringValue(unescapeQuotedString(String)); + return R; +} + +static bool isValidHexFloatingPointPrefix(char C) { + return C == 'H' || C == 'K' || C == 'L' || C == 'M'; +} + +static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { + C.advance(); + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(C.peek())) + C.advance(); + if ((C.peek() == 'e' || C.peek() == 'E') && + (isdigit(C.peek(1)) || + ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { + C.advance(2); + while (isdigit(C.peek())) + C.advance(); + } + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { + if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X')) + return None; + Cursor Range = C; + C.advance(2); + unsigned PrefLen = 2; + if (isValidHexFloatingPointPrefix(C.peek())) { + C.advance(); + PrefLen++; + } + while (isxdigit(C.peek())) + C.advance(); + StringRef StrVal = Range.upto(C); + if (StrVal.size() <= PrefLen) + return None; + if (PrefLen == 2) + Token.reset(MIToken::HexLiteral, Range.upto(C)); + else // It must be 3, which means that there was a floating-point prefix. + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { + if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) + return None; + auto Range = C; + C.advance(); + while (isdigit(C.peek())) + C.advance(); + if (C.peek() == '.') + return lexFloatingPointLiteral(Range, C, Token); + StringRef StrVal = Range.upto(C); + Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); + return C; +} + +static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { + return StringSwitch<MIToken::TokenKind>(Identifier) + .Case("!tbaa", MIToken::md_tbaa) + .Case("!alias.scope", MIToken::md_alias_scope) + .Case("!noalias", MIToken::md_noalias) + .Case("!range", MIToken::md_range) + .Case("!DIExpression", MIToken::md_diexpr) + .Case("!DILocation", MIToken::md_dilocation) + .Default(MIToken::Error); +} + +static Cursor maybeLexExlaim(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '!') + return None; + auto Range = C; + C.advance(1); + if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { + Token.reset(MIToken::exclaim, Range.upto(C)); + return C; + } + while (isIdentifierChar(C.peek())) + C.advance(); + StringRef StrVal = Range.upto(C); + Token.reset(getMetadataKeywordKind(StrVal), StrVal); + if (Token.isError()) + ErrorCallback(Token.location(), + "use of unknown metadata keyword '" + StrVal + "'"); + return C; +} + +static MIToken::TokenKind symbolToken(char C) { + switch (C) { + case ',': + return MIToken::comma; + case '.': + return MIToken::dot; + case '=': + return MIToken::equal; + case ':': + return MIToken::colon; + case '(': + return MIToken::lparen; + case ')': + return MIToken::rparen; + case '{': + return MIToken::lbrace; + case '}': + return MIToken::rbrace; + case '+': + return MIToken::plus; + case '-': + return MIToken::minus; + case '<': + return MIToken::less; + case '>': + return MIToken::greater; + default: + return MIToken::Error; + } +} + +static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { + MIToken::TokenKind Kind; + unsigned Length = 1; + if (C.peek() == ':' && C.peek(1) == ':') { + Kind = MIToken::coloncolon; + Length = 2; + } else + Kind = symbolToken(C.peek()); + if (Kind == MIToken::Error) + return None; + auto Range = C; + C.advance(Length); + Token.reset(Kind, Range.upto(C)); + return C; +} + +static Cursor maybeLexNewline(Cursor C, MIToken &Token) { + if (!isNewlineChar(C.peek())) + return None; + auto Range = C; + C.advance(); + Token.reset(MIToken::Newline, Range.upto(C)); + return C; +} + +static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '`') + return None; + auto Range = C; + C.advance(); + auto StrRange = C; + while (C.peek() != '`') { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '`'"); + Token.reset(MIToken::Error, Range.remaining()); + return C; + } + C.advance(); + } + StringRef Value = StrRange.upto(C); + C.advance(); + Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); + return C; +} + +StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, + ErrorCallbackType ErrorCallback) { + auto C = skipComment(skipWhitespace(Cursor(Source))); + if (C.isEOF()) { + Token.reset(MIToken::Eof, C.remaining()); + return C.remaining(); + } + + if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexIdentifier(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexJumpTableIndex(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexFixedStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexConstantPoolItem(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexNumericalLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexSymbol(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexNewline(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) + return R.remaining(); + + Token.reset(MIToken::Error, C.remaining()); + ErrorCallback(C.location(), + Twine("unexpected character '") + Twine(C.peek()) + "'"); + return C.remaining(); +} diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h new file mode 100644 index 000000000000..200f9d026cc8 --- /dev/null +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -0,0 +1,236 @@ +//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the function that lexes the machine instruction source +// string. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H +#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H + +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include <string> + +namespace llvm { + +class Twine; + +/// A token produced by the machine instruction lexer. +struct MIToken { + enum TokenKind { + // Markers + Eof, + Error, + Newline, + + // Tokens with no info. + comma, + equal, + underscore, + colon, + coloncolon, + dot, + exclaim, + lparen, + rparen, + lbrace, + rbrace, + plus, + minus, + less, + greater, + + // Keywords + kw_implicit, + kw_implicit_define, + kw_def, + kw_dead, + kw_dereferenceable, + kw_killed, + kw_undef, + kw_internal, + kw_early_clobber, + kw_debug_use, + kw_renamable, + kw_tied_def, + kw_frame_setup, + kw_frame_destroy, + kw_nnan, + kw_ninf, + kw_nsz, + kw_arcp, + kw_contract, + kw_afn, + kw_reassoc, + kw_nuw, + kw_nsw, + kw_exact, + kw_fpexcept, + kw_debug_location, + kw_cfi_same_value, + kw_cfi_offset, + kw_cfi_rel_offset, + kw_cfi_def_cfa_register, + kw_cfi_def_cfa_offset, + kw_cfi_adjust_cfa_offset, + kw_cfi_escape, + kw_cfi_def_cfa, + kw_cfi_register, + kw_cfi_remember_state, + kw_cfi_restore, + kw_cfi_restore_state, + kw_cfi_undefined, + kw_cfi_window_save, + kw_cfi_aarch64_negate_ra_sign_state, + kw_blockaddress, + kw_intrinsic, + kw_target_index, + kw_half, + kw_float, + kw_double, + kw_x86_fp80, + kw_fp128, + kw_ppc_fp128, + kw_target_flags, + kw_volatile, + kw_non_temporal, + kw_invariant, + kw_align, + kw_addrspace, + kw_stack, + kw_got, + kw_jump_table, + kw_constant_pool, + kw_call_entry, + kw_liveout, + kw_address_taken, + kw_landing_pad, + kw_liveins, + kw_successors, + kw_floatpred, + kw_intpred, + kw_shufflemask, + kw_pre_instr_symbol, + kw_post_instr_symbol, + kw_unknown_size, + + // Named metadata keywords + md_tbaa, + md_alias_scope, + md_noalias, + md_range, + md_diexpr, + md_dilocation, + + // Identifier tokens + Identifier, + NamedRegister, + NamedVirtualRegister, + MachineBasicBlockLabel, + MachineBasicBlock, + StackObject, + FixedStackObject, + NamedGlobalValue, + GlobalValue, + ExternalSymbol, + MCSymbol, + + // Other tokens + IntegerLiteral, + FloatingPointLiteral, + HexLiteral, + VectorLiteral, + VirtualRegister, + ConstantPoolItem, + JumpTableIndex, + NamedIRBlock, + IRBlock, + NamedIRValue, + IRValue, + QuotedIRValue, // `<constant value>` + SubRegisterIndex, + StringConstant + }; + +private: + TokenKind Kind = Error; + StringRef Range; + StringRef StringValue; + std::string StringValueStorage; + APSInt IntVal; + +public: + MIToken() = default; + + MIToken &reset(TokenKind Kind, StringRef Range); + + MIToken &setStringValue(StringRef StrVal); + MIToken &setOwnedStringValue(std::string StrVal); + MIToken &setIntegerValue(APSInt IntVal); + + TokenKind kind() const { return Kind; } + + bool isError() const { return Kind == Error; } + + bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; } + + bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; } + + bool isRegister() const { + return Kind == NamedRegister || Kind == underscore || + Kind == NamedVirtualRegister || Kind == VirtualRegister; + } + + bool isRegisterFlag() const { + return Kind == kw_implicit || Kind == kw_implicit_define || + Kind == kw_def || Kind == kw_dead || Kind == kw_killed || + Kind == kw_undef || Kind == kw_internal || + Kind == kw_early_clobber || Kind == kw_debug_use || + Kind == kw_renamable; + } + + bool isMemoryOperandFlag() const { + return Kind == kw_volatile || Kind == kw_non_temporal || + Kind == kw_dereferenceable || Kind == kw_invariant || + Kind == StringConstant; + } + + bool is(TokenKind K) const { return Kind == K; } + + bool isNot(TokenKind K) const { return Kind != K; } + + StringRef::iterator location() const { return Range.begin(); } + + StringRef range() const { return Range; } + + /// Return the token's string value. + StringRef stringValue() const { return StringValue; } + + const APSInt &integerValue() const { return IntVal; } + + bool hasIntegerValue() const { + return Kind == IntegerLiteral || Kind == MachineBasicBlock || + Kind == MachineBasicBlockLabel || Kind == StackObject || + Kind == FixedStackObject || Kind == GlobalValue || + Kind == VirtualRegister || Kind == ConstantPoolItem || + Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue; + } +}; + +/// Consume a single machine instruction token in the given source and return +/// the remaining source string. +StringRef lexMIToken( + StringRef Source, MIToken &Token, + function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback); + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp new file mode 100644 index 000000000000..6498acc9fa51 --- /dev/null +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -0,0 +1,3089 @@ +//===- MIParser.cpp - Machine instructions parser implementation ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the parsing of machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRParser/MIParser.h" +#include "MILexer.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <string> +#include <utility> + +using namespace llvm; + +void PerTargetMIParsingState::setTarget( + const TargetSubtargetInfo &NewSubtarget) { + + // If the subtarget changed, over conservatively assume everything is invalid. + if (&Subtarget == &NewSubtarget) + return; + + Names2InstrOpCodes.clear(); + Names2Regs.clear(); + Names2RegMasks.clear(); + Names2SubRegIndices.clear(); + Names2TargetIndices.clear(); + Names2DirectTargetFlags.clear(); + Names2BitmaskTargetFlags.clear(); + Names2MMOTargetFlags.clear(); + + initNames2RegClasses(); + initNames2RegBanks(); +} + +void PerTargetMIParsingState::initNames2Regs() { + if (!Names2Regs.empty()) + return; + + // The '%noreg' register is the register 0. + Names2Regs.insert(std::make_pair("noreg", 0)); + const auto *TRI = Subtarget.getRegisterInfo(); + assert(TRI && "Expected target register info"); + + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { + bool WasInserted = + Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) + .second; + (void)WasInserted; + assert(WasInserted && "Expected registers to be unique case-insensitively"); + } +} + +bool PerTargetMIParsingState::getRegisterByName(StringRef RegName, + unsigned &Reg) { + initNames2Regs(); + auto RegInfo = Names2Regs.find(RegName); + if (RegInfo == Names2Regs.end()) + return true; + Reg = RegInfo->getValue(); + return false; +} + +void PerTargetMIParsingState::initNames2InstrOpCodes() { + if (!Names2InstrOpCodes.empty()) + return; + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) + Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); +} + +bool PerTargetMIParsingState::parseInstrName(StringRef InstrName, + unsigned &OpCode) { + initNames2InstrOpCodes(); + auto InstrInfo = Names2InstrOpCodes.find(InstrName); + if (InstrInfo == Names2InstrOpCodes.end()) + return true; + OpCode = InstrInfo->getValue(); + return false; +} + +void PerTargetMIParsingState::initNames2RegMasks() { + if (!Names2RegMasks.empty()) + return; + const auto *TRI = Subtarget.getRegisterInfo(); + assert(TRI && "Expected target register info"); + ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); + ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); + assert(RegMasks.size() == RegMaskNames.size()); + for (size_t I = 0, E = RegMasks.size(); I < E; ++I) + Names2RegMasks.insert( + std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); +} + +const uint32_t *PerTargetMIParsingState::getRegMask(StringRef Identifier) { + initNames2RegMasks(); + auto RegMaskInfo = Names2RegMasks.find(Identifier); + if (RegMaskInfo == Names2RegMasks.end()) + return nullptr; + return RegMaskInfo->getValue(); +} + +void PerTargetMIParsingState::initNames2SubRegIndices() { + if (!Names2SubRegIndices.empty()) + return; + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I) + Names2SubRegIndices.insert( + std::make_pair(TRI->getSubRegIndexName(I), I)); +} + +unsigned PerTargetMIParsingState::getSubRegIndex(StringRef Name) { + initNames2SubRegIndices(); + auto SubRegInfo = Names2SubRegIndices.find(Name); + if (SubRegInfo == Names2SubRegIndices.end()) + return 0; + return SubRegInfo->getValue(); +} + +void PerTargetMIParsingState::initNames2TargetIndices() { + if (!Names2TargetIndices.empty()) + return; + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) + Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getTargetIndex(StringRef Name, int &Index) { + initNames2TargetIndices(); + auto IndexInfo = Names2TargetIndices.find(Name); + if (IndexInfo == Names2TargetIndices.end()) + return true; + Index = IndexInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2DirectTargetFlags() { + if (!Names2DirectTargetFlags.empty()) + return; + + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2DirectTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getDirectTargetFlag(StringRef Name, + unsigned &Flag) { + initNames2DirectTargetFlags(); + auto FlagInfo = Names2DirectTargetFlags.find(Name); + if (FlagInfo == Names2DirectTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2BitmaskTargetFlags() { + if (!Names2BitmaskTargetFlags.empty()) + return; + + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2BitmaskTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getBitmaskTargetFlag(StringRef Name, + unsigned &Flag) { + initNames2BitmaskTargetFlags(); + auto FlagInfo = Names2BitmaskTargetFlags.find(Name); + if (FlagInfo == Names2BitmaskTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2MMOTargetFlags() { + if (!Names2MMOTargetFlags.empty()) + return; + + const auto *TII = Subtarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) + Names2MMOTargetFlags.insert(std::make_pair(StringRef(I.second), I.first)); +} + +bool PerTargetMIParsingState::getMMOTargetFlag(StringRef Name, + MachineMemOperand::Flags &Flag) { + initNames2MMOTargetFlags(); + auto FlagInfo = Names2MMOTargetFlags.find(Name); + if (FlagInfo == Names2MMOTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void PerTargetMIParsingState::initNames2RegClasses() { + if (!Names2RegClasses.empty()) + return; + + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) { + const auto *RC = TRI->getRegClass(I); + Names2RegClasses.insert( + std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC)); + } +} + +void PerTargetMIParsingState::initNames2RegBanks() { + if (!Names2RegBanks.empty()) + return; + + const RegisterBankInfo *RBI = Subtarget.getRegBankInfo(); + // If the target does not support GlobalISel, we may not have a + // register bank info. + if (!RBI) + return; + + for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) { + const auto &RegBank = RBI->getRegBank(I); + Names2RegBanks.insert( + std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank)); + } +} + +const TargetRegisterClass * +PerTargetMIParsingState::getRegClass(StringRef Name) { + auto RegClassInfo = Names2RegClasses.find(Name); + if (RegClassInfo == Names2RegClasses.end()) + return nullptr; + return RegClassInfo->getValue(); +} + +const RegisterBank *PerTargetMIParsingState::getRegBank(StringRef Name) { + auto RegBankInfo = Names2RegBanks.find(Name); + if (RegBankInfo == Names2RegBanks.end()) + return nullptr; + return RegBankInfo->getValue(); +} + +PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF, + SourceMgr &SM, const SlotMapping &IRSlots, PerTargetMIParsingState &T) + : MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) { +} + +VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) { + auto I = VRegInfos.insert(std::make_pair(Num, nullptr)); + if (I.second) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + VRegInfo *Info = new (Allocator) VRegInfo; + Info->VReg = MRI.createIncompleteVirtualRegister(); + I.first->second = Info; + } + return *I.first->second; +} + +VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) { + assert(RegName != "" && "Expected named reg."); + + auto I = VRegInfosNamed.insert(std::make_pair(RegName.str(), nullptr)); + if (I.second) { + VRegInfo *Info = new (Allocator) VRegInfo; + Info->VReg = MF.getRegInfo().createIncompleteVirtualRegister(RegName); + I.first->second = Info; + } + return *I.first->second; +} + +namespace { + +/// A wrapper struct around the 'MachineOperand' struct that includes a source +/// range and other attributes. +struct ParsedMachineOperand { + MachineOperand Operand; + StringRef::iterator Begin; + StringRef::iterator End; + Optional<unsigned> TiedDefIdx; + + ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin, + StringRef::iterator End, Optional<unsigned> &TiedDefIdx) + : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) { + if (TiedDefIdx) + assert(Operand.isReg() && Operand.isUse() && + "Only used register operands can be tied"); + } +}; + +class MIParser { + MachineFunction &MF; + SMDiagnostic &Error; + StringRef Source, CurrentSource; + MIToken Token; + PerFunctionMIParsingState &PFS; + /// Maps from slot numbers to function's unnamed basic blocks. + DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks; + /// Maps from slot numbers to function's unnamed values. + DenseMap<unsigned, const Value *> Slots2Values; + +public: + MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source); + + /// \p SkipChar gives the number of characters to skip before looking + /// for the next token. + void lex(unsigned SkipChar = 0); + + /// Report an error at the current location with the given message. + /// + /// This function always return true. + bool error(const Twine &Msg); + + /// Report an error at the given location with the given message. + /// + /// This function always return true. + bool error(StringRef::iterator Loc, const Twine &Msg); + + bool + parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); + bool parseBasicBlocks(); + bool parse(MachineInstr *&MI); + bool parseStandaloneMBB(MachineBasicBlock *&MBB); + bool parseStandaloneNamedRegister(unsigned &Reg); + bool parseStandaloneVirtualRegister(VRegInfo *&Info); + bool parseStandaloneRegister(unsigned &Reg); + bool parseStandaloneStackObject(int &FI); + bool parseStandaloneMDNode(MDNode *&Node); + + bool + parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); + bool parseBasicBlock(MachineBasicBlock &MBB, + MachineBasicBlock *&AddFalthroughFrom); + bool parseBasicBlockLiveins(MachineBasicBlock &MBB); + bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); + + bool parseNamedRegister(unsigned &Reg); + bool parseVirtualRegister(VRegInfo *&Info); + bool parseNamedVirtualRegister(VRegInfo *&Info); + bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo); + bool parseRegisterFlag(unsigned &Flags); + bool parseRegisterClassOrBank(VRegInfo &RegInfo); + bool parseSubRegisterIndex(unsigned &SubReg); + bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx); + bool parseRegisterOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx, bool IsDef = false); + bool parseImmediateOperand(MachineOperand &Dest); + bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + const Constant *&C); + bool parseIRConstant(StringRef::iterator Loc, const Constant *&C); + bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty); + bool parseTypedImmediateOperand(MachineOperand &Dest); + bool parseFPImmediateOperand(MachineOperand &Dest); + bool parseMBBReference(MachineBasicBlock *&MBB); + bool parseMBBOperand(MachineOperand &Dest); + bool parseStackFrameIndex(int &FI); + bool parseStackObjectOperand(MachineOperand &Dest); + bool parseFixedStackFrameIndex(int &FI); + bool parseFixedStackObjectOperand(MachineOperand &Dest); + bool parseGlobalValue(GlobalValue *&GV); + bool parseGlobalAddressOperand(MachineOperand &Dest); + bool parseConstantPoolIndexOperand(MachineOperand &Dest); + bool parseSubRegisterIndexOperand(MachineOperand &Dest); + bool parseJumpTableIndexOperand(MachineOperand &Dest); + bool parseExternalSymbolOperand(MachineOperand &Dest); + bool parseMCSymbolOperand(MachineOperand &Dest); + bool parseMDNode(MDNode *&Node); + bool parseDIExpression(MDNode *&Expr); + bool parseDILocation(MDNode *&Expr); + bool parseMetadataOperand(MachineOperand &Dest); + bool parseCFIOffset(int &Offset); + bool parseCFIRegister(unsigned &Reg); + bool parseCFIEscapeValues(std::string& Values); + bool parseCFIOperand(MachineOperand &Dest); + bool parseIRBlock(BasicBlock *&BB, const Function &F); + bool parseBlockAddressOperand(MachineOperand &Dest); + bool parseIntrinsicOperand(MachineOperand &Dest); + bool parsePredicateOperand(MachineOperand &Dest); + bool parseShuffleMaskOperand(MachineOperand &Dest); + bool parseTargetIndexOperand(MachineOperand &Dest); + bool parseCustomRegisterMaskOperand(MachineOperand &Dest); + bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); + bool parseMachineOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx); + bool parseMachineOperandAndTargetFlags(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx); + bool parseOffset(int64_t &Offset); + bool parseAlignment(unsigned &Alignment); + bool parseAddrspace(unsigned &Addrspace); + bool parseOperandsOffset(MachineOperand &Op); + bool parseIRValue(const Value *&V); + bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); + bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); + bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID); + bool parseOptionalAtomicOrdering(AtomicOrdering &Order); + bool parseMachineMemoryOperand(MachineMemOperand *&Dest); + bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol); + +private: + /// Convert the integer literal in the current token into an unsigned integer. + /// + /// Return true if an error occurred. + bool getUnsigned(unsigned &Result); + + /// Convert the integer literal in the current token into an uint64. + /// + /// Return true if an error occurred. + bool getUint64(uint64_t &Result); + + /// Convert the hexadecimal literal in the current token into an unsigned + /// APInt with a minimum bitwidth required to represent the value. + /// + /// Return true if the literal does not represent an integer value. + bool getHexUint(APInt &Result); + + /// If the current token is of the given kind, consume it and return false. + /// Otherwise report an error and return true. + bool expectAndConsume(MIToken::TokenKind TokenKind); + + /// If the current token is of the given kind, consume it and return true. + /// Otherwise return false. + bool consumeIfPresent(MIToken::TokenKind TokenKind); + + bool parseInstruction(unsigned &OpCode, unsigned &Flags); + + bool assignRegisterTies(MachineInstr &MI, + ArrayRef<ParsedMachineOperand> Operands); + + bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, + const MCInstrDesc &MCID); + + const BasicBlock *getIRBlock(unsigned Slot); + const BasicBlock *getIRBlock(unsigned Slot, const Function &F); + + const Value *getIRValue(unsigned Slot); + + /// Get or create an MCSymbol for a given name. + MCSymbol *getOrCreateMCSymbol(StringRef Name); + + /// parseStringConstant + /// ::= StringConstant + bool parseStringConstant(std::string &Result); +}; + +} // end anonymous namespace + +MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source) + : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS) +{} + +void MIParser::lex(unsigned SkipChar) { + CurrentSource = lexMIToken( + CurrentSource.data() + SkipChar, Token, + [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); +} + +bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } + +bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { + const SourceMgr &SM = *PFS.SM; + assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); + const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID()); + if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) { + // Create an ordinary diagnostic when the source manager's buffer is the + // source string. + Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg); + return true; + } + // Create a diagnostic for a YAML string literal. + Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1, + Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), + Source, None, None); + return true; +} + +static const char *toString(MIToken::TokenKind TokenKind) { + switch (TokenKind) { + case MIToken::comma: + return "','"; + case MIToken::equal: + return "'='"; + case MIToken::colon: + return "':'"; + case MIToken::lparen: + return "'('"; + case MIToken::rparen: + return "')'"; + default: + return "<unknown token>"; + } +} + +bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return error(Twine("expected ") + toString(TokenKind)); + lex(); + return false; +} + +bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return false; + lex(); + return true; +} + +bool MIParser::parseBasicBlockDefinition( + DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { + assert(Token.is(MIToken::MachineBasicBlockLabel)); + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + auto Loc = Token.location(); + auto Name = Token.stringValue(); + lex(); + bool HasAddressTaken = false; + bool IsLandingPad = false; + unsigned Alignment = 0; + BasicBlock *BB = nullptr; + if (consumeIfPresent(MIToken::lparen)) { + do { + // TODO: Report an error when multiple same attributes are specified. + switch (Token.kind()) { + case MIToken::kw_address_taken: + HasAddressTaken = true; + lex(); + break; + case MIToken::kw_landing_pad: + IsLandingPad = true; + lex(); + break; + case MIToken::kw_align: + if (parseAlignment(Alignment)) + return true; + break; + case MIToken::IRBlock: + // TODO: Report an error when both name and ir block are specified. + if (parseIRBlock(BB, MF.getFunction())) + return true; + lex(); + break; + default: + break; + } + } while (consumeIfPresent(MIToken::comma)); + if (expectAndConsume(MIToken::rparen)) + return true; + } + if (expectAndConsume(MIToken::colon)) + return true; + + if (!Name.empty()) { + BB = dyn_cast_or_null<BasicBlock>( + MF.getFunction().getValueSymbolTable()->lookup(Name)); + if (!BB) + return error(Loc, Twine("basic block '") + Name + + "' is not defined in the function '" + + MF.getName() + "'"); + } + auto *MBB = MF.CreateMachineBasicBlock(BB); + MF.insert(MF.end(), MBB); + bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second; + if (!WasInserted) + return error(Loc, Twine("redefinition of machine basic block with id #") + + Twine(ID)); + if (Alignment) + MBB->setAlignment(Align(Alignment)); + if (HasAddressTaken) + MBB->setHasAddressTaken(); + MBB->setIsEHPad(IsLandingPad); + return false; +} + +bool MIParser::parseBasicBlockDefinitions( + DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + if (Token.isNot(MIToken::MachineBasicBlockLabel)) + return error("expected a basic block definition before instructions"); + unsigned BraceDepth = 0; + do { + if (parseBasicBlockDefinition(MBBSlots)) + return true; + bool IsAfterNewline = false; + // Skip until the next machine basic block. + while (true) { + if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) || + Token.isErrorOrEOF()) + break; + else if (Token.is(MIToken::MachineBasicBlockLabel)) + return error("basic block definition should be located at the start of " + "the line"); + else if (consumeIfPresent(MIToken::Newline)) { + IsAfterNewline = true; + continue; + } + IsAfterNewline = false; + if (Token.is(MIToken::lbrace)) + ++BraceDepth; + if (Token.is(MIToken::rbrace)) { + if (!BraceDepth) + return error("extraneous closing brace ('}')"); + --BraceDepth; + } + lex(); + } + // Verify that we closed all of the '{' at the end of a file or a block. + if (!Token.isError() && BraceDepth) + return error("expected '}'"); // FIXME: Report a note that shows '{'. + } while (!Token.isErrorOrEOF()); + return Token.isError(); +} + +bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_liveins)); + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of liveins. + return false; + do { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg = 0; + if (parseNamedRegister(Reg)) + return true; + lex(); + LaneBitmask Mask = LaneBitmask::getAll(); + if (consumeIfPresent(MIToken::colon)) { + // Parse lane mask. + if (Token.isNot(MIToken::IntegerLiteral) && + Token.isNot(MIToken::HexLiteral)) + return error("expected a lane mask"); + static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned), + "Use correct get-function for lane mask"); + LaneBitmask::Type V; + if (getUnsigned(V)) + return error("invalid lane mask value"); + Mask = LaneBitmask(V); + lex(); + } + MBB.addLiveIn(Reg, Mask); + } while (consumeIfPresent(MIToken::comma)); + return false; +} + +bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_successors)); + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of successors. + return false; + do { + if (Token.isNot(MIToken::MachineBasicBlock)) + return error("expected a machine basic block reference"); + MachineBasicBlock *SuccMBB = nullptr; + if (parseMBBReference(SuccMBB)) + return true; + lex(); + unsigned Weight = 0; + if (consumeIfPresent(MIToken::lparen)) { + if (Token.isNot(MIToken::IntegerLiteral) && + Token.isNot(MIToken::HexLiteral)) + return error("expected an integer literal after '('"); + if (getUnsigned(Weight)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + } + MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight)); + } while (consumeIfPresent(MIToken::comma)); + MBB.normalizeSuccProbs(); + return false; +} + +bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, + MachineBasicBlock *&AddFalthroughFrom) { + // Skip the definition. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + lex(); + if (consumeIfPresent(MIToken::lparen)) { + while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF()) + lex(); + consumeIfPresent(MIToken::rparen); + } + consumeIfPresent(MIToken::colon); + + // Parse the liveins and successors. + // N.B: Multiple lists of successors and liveins are allowed and they're + // merged into one. + // Example: + // liveins: %edi + // liveins: %esi + // + // is equivalent to + // liveins: %edi, %esi + bool ExplicitSuccessors = false; + while (true) { + if (Token.is(MIToken::kw_successors)) { + if (parseBasicBlockSuccessors(MBB)) + return true; + ExplicitSuccessors = true; + } else if (Token.is(MIToken::kw_liveins)) { + if (parseBasicBlockLiveins(MBB)) + return true; + } else if (consumeIfPresent(MIToken::Newline)) { + continue; + } else + break; + if (!Token.isNewlineOrEOF()) + return error("expected line break at the end of a list"); + lex(); + } + + // Parse the instructions. + bool IsInBundle = false; + MachineInstr *PrevMI = nullptr; + while (!Token.is(MIToken::MachineBasicBlockLabel) && + !Token.is(MIToken::Eof)) { + if (consumeIfPresent(MIToken::Newline)) + continue; + if (consumeIfPresent(MIToken::rbrace)) { + // The first parsing pass should verify that all closing '}' have an + // opening '{'. + assert(IsInBundle); + IsInBundle = false; + continue; + } + MachineInstr *MI = nullptr; + if (parse(MI)) + return true; + MBB.insert(MBB.end(), MI); + if (IsInBundle) { + PrevMI->setFlag(MachineInstr::BundledSucc); + MI->setFlag(MachineInstr::BundledPred); + } + PrevMI = MI; + if (Token.is(MIToken::lbrace)) { + if (IsInBundle) + return error("nested instruction bundles are not allowed"); + lex(); + // This instruction is the start of the bundle. + MI->setFlag(MachineInstr::BundledSucc); + IsInBundle = true; + if (!Token.is(MIToken::Newline)) + // The next instruction can be on the same line. + continue; + } + assert(Token.isNewlineOrEOF() && "MI is not fully parsed"); + lex(); + } + + // Construct successor list by searching for basic block machine operands. + if (!ExplicitSuccessors) { + SmallVector<MachineBasicBlock*,4> Successors; + bool IsFallthrough; + guessSuccessors(MBB, Successors, IsFallthrough); + for (MachineBasicBlock *Succ : Successors) + MBB.addSuccessor(Succ); + + if (IsFallthrough) { + AddFalthroughFrom = &MBB; + } else { + MBB.normalizeSuccProbs(); + } + } + + return false; +} + +bool MIParser::parseBasicBlocks() { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + // The first parsing pass should have verified that this token is a MBB label + // in the 'parseBasicBlockDefinitions' method. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + MachineBasicBlock *AddFalthroughFrom = nullptr; + do { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB)) + return true; + if (AddFalthroughFrom) { + if (!AddFalthroughFrom->isSuccessor(MBB)) + AddFalthroughFrom->addSuccessor(MBB); + AddFalthroughFrom->normalizeSuccProbs(); + AddFalthroughFrom = nullptr; + } + if (parseBasicBlock(*MBB, AddFalthroughFrom)) + return true; + // The method 'parseBasicBlock' should parse the whole block until the next + // block or the end of file. + assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)); + } while (Token.isNot(MIToken::Eof)); + return false; +} + +bool MIParser::parse(MachineInstr *&MI) { + // Parse any register operands before '=' + MachineOperand MO = MachineOperand::CreateImm(0); + SmallVector<ParsedMachineOperand, 8> Operands; + while (Token.isRegister() || Token.isRegisterFlag()) { + auto Loc = Token.location(); + Optional<unsigned> TiedDefIdx; + if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true)) + return true; + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + if (!Operands.empty() && expectAndConsume(MIToken::equal)) + return true; + + unsigned OpCode, Flags = 0; + if (Token.isError() || parseInstruction(OpCode, Flags)) + return true; + + // Parse the remaining machine operands. + while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) && + Token.isNot(MIToken::kw_post_instr_symbol) && + Token.isNot(MIToken::kw_debug_location) && + Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { + auto Loc = Token.location(); + Optional<unsigned> TiedDefIdx; + if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx)) + return true; + if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg()) + MO.setIsDebug(); + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) + break; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine operand"); + lex(); + } + + MCSymbol *PreInstrSymbol = nullptr; + if (Token.is(MIToken::kw_pre_instr_symbol)) + if (parsePreOrPostInstrSymbol(PreInstrSymbol)) + return true; + MCSymbol *PostInstrSymbol = nullptr; + if (Token.is(MIToken::kw_post_instr_symbol)) + if (parsePreOrPostInstrSymbol(PostInstrSymbol)) + return true; + + DebugLoc DebugLocation; + if (Token.is(MIToken::kw_debug_location)) { + lex(); + MDNode *Node = nullptr; + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(Node)) + return true; + } else if (Token.is(MIToken::md_dilocation)) { + if (parseDILocation(Node)) + return true; + } else + return error("expected a metadata node after 'debug-location'"); + if (!isa<DILocation>(Node)) + return error("referenced metadata is not a DILocation"); + DebugLocation = DebugLoc(Node); + } + + // Parse the machine memory operands. + SmallVector<MachineMemOperand *, 2> MemOperands; + if (Token.is(MIToken::coloncolon)) { + lex(); + while (!Token.isNewlineOrEOF()) { + MachineMemOperand *MemOp = nullptr; + if (parseMachineMemoryOperand(MemOp)) + return true; + MemOperands.push_back(MemOp); + if (Token.isNewlineOrEOF()) + break; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine memory operand"); + lex(); + } + } + + const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); + if (!MCID.isVariadic()) { + // FIXME: Move the implicit operand verification to the machine verifier. + if (verifyImplicitOperands(Operands, MCID)) + return true; + } + + // TODO: Check for extraneous machine operands. + MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); + MI->setFlags(Flags); + for (const auto &Operand : Operands) + MI->addOperand(MF, Operand.Operand); + if (assignRegisterTies(*MI, Operands)) + return true; + if (PreInstrSymbol) + MI->setPreInstrSymbol(MF, PreInstrSymbol); + if (PostInstrSymbol) + MI->setPostInstrSymbol(MF, PostInstrSymbol); + if (!MemOperands.empty()) + MI->setMemRefs(MF, MemOperands); + return false; +} + +bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) { + lex(); + if (Token.isNot(MIToken::MachineBasicBlock)) + return error("expected a machine basic block reference"); + if (parseMBBReference(MBB)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error( + "expected end of string after the machine basic block reference"); + return false; +} + +bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + if (parseNamedRegister(Reg)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) { + lex(); + if (Token.isNot(MIToken::VirtualRegister)) + return error("expected a virtual register"); + if (parseVirtualRegister(Info)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::NamedRegister) && + Token.isNot(MIToken::VirtualRegister)) + return error("expected either a named or virtual register"); + + VRegInfo *Info; + if (parseRegister(Reg, Info)) + return true; + + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneStackObject(int &FI) { + lex(); + if (Token.isNot(MIToken::StackObject)) + return error("expected a stack object"); + if (parseStackFrameIndex(FI)) + return true; + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the stack object reference"); + return false; +} + +bool MIParser::parseStandaloneMDNode(MDNode *&Node) { + lex(); + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(Node)) + return true; + } else if (Token.is(MIToken::md_diexpr)) { + if (parseDIExpression(Node)) + return true; + } else if (Token.is(MIToken::md_dilocation)) { + if (parseDILocation(Node)) + return true; + } else + return error("expected a metadata node"); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the metadata node"); + return false; +} + +static const char *printImplicitRegisterFlag(const MachineOperand &MO) { + assert(MO.isImplicit()); + return MO.isDef() ? "implicit-def" : "implicit"; +} + +static std::string getRegisterName(const TargetRegisterInfo *TRI, + unsigned Reg) { + assert(Register::isPhysicalRegister(Reg) && "expected phys reg"); + return StringRef(TRI->getName(Reg)).lower(); +} + +/// Return true if the parsed machine operands contain a given machine operand. +static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand, + ArrayRef<ParsedMachineOperand> Operands) { + for (const auto &I : Operands) { + if (ImplicitOperand.isIdenticalTo(I.Operand)) + return true; + } + return false; +} + +bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, + const MCInstrDesc &MCID) { + if (MCID.isCall()) + // We can't verify call instructions as they can contain arbitrary implicit + // register and register mask operands. + return false; + + // Gather all the expected implicit operands. + SmallVector<MachineOperand, 4> ImplicitOperands; + if (MCID.ImplicitDefs) + for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) + ImplicitOperands.push_back( + MachineOperand::CreateReg(*ImpDefs, true, true)); + if (MCID.ImplicitUses) + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) + ImplicitOperands.push_back( + MachineOperand::CreateReg(*ImpUses, false, true)); + + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + for (const auto &I : ImplicitOperands) { + if (isImplicitOperandIn(I, Operands)) + continue; + return error(Operands.empty() ? Token.location() : Operands.back().End, + Twine("missing implicit register operand '") + + printImplicitRegisterFlag(I) + " $" + + getRegisterName(TRI, I.getReg()) + "'"); + } + return false; +} + +bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { + // Allow frame and fast math flags for OPCODE + while (Token.is(MIToken::kw_frame_setup) || + Token.is(MIToken::kw_frame_destroy) || + Token.is(MIToken::kw_nnan) || + Token.is(MIToken::kw_ninf) || + Token.is(MIToken::kw_nsz) || + Token.is(MIToken::kw_arcp) || + Token.is(MIToken::kw_contract) || + Token.is(MIToken::kw_afn) || + Token.is(MIToken::kw_reassoc) || + Token.is(MIToken::kw_nuw) || + Token.is(MIToken::kw_nsw) || + Token.is(MIToken::kw_exact) || + Token.is(MIToken::kw_fpexcept)) { + // Mine frame and fast math flags + if (Token.is(MIToken::kw_frame_setup)) + Flags |= MachineInstr::FrameSetup; + if (Token.is(MIToken::kw_frame_destroy)) + Flags |= MachineInstr::FrameDestroy; + if (Token.is(MIToken::kw_nnan)) + Flags |= MachineInstr::FmNoNans; + if (Token.is(MIToken::kw_ninf)) + Flags |= MachineInstr::FmNoInfs; + if (Token.is(MIToken::kw_nsz)) + Flags |= MachineInstr::FmNsz; + if (Token.is(MIToken::kw_arcp)) + Flags |= MachineInstr::FmArcp; + if (Token.is(MIToken::kw_contract)) + Flags |= MachineInstr::FmContract; + if (Token.is(MIToken::kw_afn)) + Flags |= MachineInstr::FmAfn; + if (Token.is(MIToken::kw_reassoc)) + Flags |= MachineInstr::FmReassoc; + if (Token.is(MIToken::kw_nuw)) + Flags |= MachineInstr::NoUWrap; + if (Token.is(MIToken::kw_nsw)) + Flags |= MachineInstr::NoSWrap; + if (Token.is(MIToken::kw_exact)) + Flags |= MachineInstr::IsExact; + if (Token.is(MIToken::kw_fpexcept)) + Flags |= MachineInstr::FPExcept; + + lex(); + } + if (Token.isNot(MIToken::Identifier)) + return error("expected a machine instruction"); + StringRef InstrName = Token.stringValue(); + if (PFS.Target.parseInstrName(InstrName, OpCode)) + return error(Twine("unknown machine instruction name '") + InstrName + "'"); + lex(); + return false; +} + +bool MIParser::parseNamedRegister(unsigned &Reg) { + assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token"); + StringRef Name = Token.stringValue(); + if (PFS.Target.getRegisterByName(Name, Reg)) + return error(Twine("unknown register name '") + Name + "'"); + return false; +} + +bool MIParser::parseNamedVirtualRegister(VRegInfo *&Info) { + assert(Token.is(MIToken::NamedVirtualRegister) && "Expected NamedVReg token"); + StringRef Name = Token.stringValue(); + // TODO: Check that the VReg name is not the same as a physical register name. + // If it is, then print a warning (when warnings are implemented). + Info = &PFS.getVRegInfoNamed(Name); + return false; +} + +bool MIParser::parseVirtualRegister(VRegInfo *&Info) { + if (Token.is(MIToken::NamedVirtualRegister)) + return parseNamedVirtualRegister(Info); + assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token"); + unsigned ID; + if (getUnsigned(ID)) + return true; + Info = &PFS.getVRegInfo(ID); + return false; +} + +bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) { + switch (Token.kind()) { + case MIToken::underscore: + Reg = 0; + return false; + case MIToken::NamedRegister: + return parseNamedRegister(Reg); + case MIToken::NamedVirtualRegister: + case MIToken::VirtualRegister: + if (parseVirtualRegister(Info)) + return true; + Reg = Info->VReg; + return false; + // TODO: Parse other register kinds. + default: + llvm_unreachable("The current token should be a register"); + } +} + +bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) { + if (Token.isNot(MIToken::Identifier) && Token.isNot(MIToken::underscore)) + return error("expected '_', register class, or register bank name"); + StringRef::iterator Loc = Token.location(); + StringRef Name = Token.stringValue(); + + // Was it a register class? + const TargetRegisterClass *RC = PFS.Target.getRegClass(Name); + if (RC) { + lex(); + + switch (RegInfo.Kind) { + case VRegInfo::UNKNOWN: + case VRegInfo::NORMAL: + RegInfo.Kind = VRegInfo::NORMAL; + if (RegInfo.Explicit && RegInfo.D.RC != RC) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + return error(Loc, Twine("conflicting register classes, previously: ") + + Twine(TRI.getRegClassName(RegInfo.D.RC))); + } + RegInfo.D.RC = RC; + RegInfo.Explicit = true; + return false; + + case VRegInfo::GENERIC: + case VRegInfo::REGBANK: + return error(Loc, "register class specification on generic register"); + } + llvm_unreachable("Unexpected register kind"); + } + + // Should be a register bank or a generic register. + const RegisterBank *RegBank = nullptr; + if (Name != "_") { + RegBank = PFS.Target.getRegBank(Name); + if (!RegBank) + return error(Loc, "expected '_', register class, or register bank name"); + } + + lex(); + + switch (RegInfo.Kind) { + case VRegInfo::UNKNOWN: + case VRegInfo::GENERIC: + case VRegInfo::REGBANK: + RegInfo.Kind = RegBank ? VRegInfo::REGBANK : VRegInfo::GENERIC; + if (RegInfo.Explicit && RegInfo.D.RegBank != RegBank) + return error(Loc, "conflicting generic register banks"); + RegInfo.D.RegBank = RegBank; + RegInfo.Explicit = true; + return false; + + case VRegInfo::NORMAL: + return error(Loc, "register bank specification on normal register"); + } + llvm_unreachable("Unexpected register kind"); +} + +bool MIParser::parseRegisterFlag(unsigned &Flags) { + const unsigned OldFlags = Flags; + switch (Token.kind()) { + case MIToken::kw_implicit: + Flags |= RegState::Implicit; + break; + case MIToken::kw_implicit_define: + Flags |= RegState::ImplicitDefine; + break; + case MIToken::kw_def: + Flags |= RegState::Define; + break; + case MIToken::kw_dead: + Flags |= RegState::Dead; + break; + case MIToken::kw_killed: + Flags |= RegState::Kill; + break; + case MIToken::kw_undef: + Flags |= RegState::Undef; + break; + case MIToken::kw_internal: + Flags |= RegState::InternalRead; + break; + case MIToken::kw_early_clobber: + Flags |= RegState::EarlyClobber; + break; + case MIToken::kw_debug_use: + Flags |= RegState::Debug; + break; + case MIToken::kw_renamable: + Flags |= RegState::Renamable; + break; + default: + llvm_unreachable("The current token should be a register flag"); + } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' register flag"); + lex(); + return false; +} + +bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { + assert(Token.is(MIToken::dot)); + lex(); + if (Token.isNot(MIToken::Identifier)) + return error("expected a subregister index after '.'"); + auto Name = Token.stringValue(); + SubReg = PFS.Target.getSubRegIndex(Name); + if (!SubReg) + return error(Twine("use of unknown subregister index '") + Name + "'"); + lex(); + return false; +} + +bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) { + if (!consumeIfPresent(MIToken::kw_tied_def)) + return true; + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after 'tied-def'"); + if (getUnsigned(TiedDefIdx)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + return false; +} + +bool MIParser::assignRegisterTies(MachineInstr &MI, + ArrayRef<ParsedMachineOperand> Operands) { + SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs; + for (unsigned I = 0, E = Operands.size(); I != E; ++I) { + if (!Operands[I].TiedDefIdx) + continue; + // The parser ensures that this operand is a register use, so we just have + // to check the tied-def operand. + unsigned DefIdx = Operands[I].TiedDefIdx.getValue(); + if (DefIdx >= E) + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '" + + Twine(DefIdx) + "'; instruction has only ") + + Twine(E) + " operands"); + const auto &DefOperand = Operands[DefIdx].Operand; + if (!DefOperand.isReg() || !DefOperand.isDef()) + // FIXME: add note with the def operand. + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '") + + Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) + + " isn't a defined register"); + // Check that the tied-def operand wasn't tied elsewhere. + for (const auto &TiedPair : TiedRegisterPairs) { + if (TiedPair.first == DefIdx) + return error(Operands[I].Begin, + Twine("the tied-def operand #") + Twine(DefIdx) + + " is already tied with another register operand"); + } + TiedRegisterPairs.push_back(std::make_pair(DefIdx, I)); + } + // FIXME: Verify that for non INLINEASM instructions, the def and use tied + // indices must be less than tied max. + for (const auto &TiedPair : TiedRegisterPairs) + MI.tieOperands(TiedPair.first, TiedPair.second); + return false; +} + +bool MIParser::parseRegisterOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx, + bool IsDef) { + unsigned Flags = IsDef ? RegState::Define : 0; + while (Token.isRegisterFlag()) { + if (parseRegisterFlag(Flags)) + return true; + } + if (!Token.isRegister()) + return error("expected a register after register flags"); + unsigned Reg; + VRegInfo *RegInfo; + if (parseRegister(Reg, RegInfo)) + return true; + lex(); + unsigned SubReg = 0; + if (Token.is(MIToken::dot)) { + if (parseSubRegisterIndex(SubReg)) + return true; + if (!Register::isVirtualRegister(Reg)) + return error("subregister index expects a virtual register"); + } + if (Token.is(MIToken::colon)) { + if (!Register::isVirtualRegister(Reg)) + return error("register class specification expects a virtual register"); + lex(); + if (parseRegisterClassOrBank(*RegInfo)) + return true; + } + MachineRegisterInfo &MRI = MF.getRegInfo(); + if ((Flags & RegState::Define) == 0) { + if (consumeIfPresent(MIToken::lparen)) { + unsigned Idx; + if (!parseRegisterTiedDefIndex(Idx)) + TiedDefIdx = Idx; + else { + // Try a redundant low-level type. + LLT Ty; + if (parseLowLevelType(Token.location(), Ty)) + return error("expected tied-def or low-level type after '('"); + + if (expectAndConsume(MIToken::rparen)) + return true; + + if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) + return error("inconsistent type for generic virtual register"); + + MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr)); + MRI.setType(Reg, Ty); + } + } + } else if (consumeIfPresent(MIToken::lparen)) { + // Virtual registers may have a tpe with GlobalISel. + if (!Register::isVirtualRegister(Reg)) + return error("unexpected type on physical register"); + + LLT Ty; + if (parseLowLevelType(Token.location(), Ty)) + return true; + + if (expectAndConsume(MIToken::rparen)) + return true; + + if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) + return error("inconsistent type for generic virtual register"); + + MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr)); + MRI.setType(Reg, Ty); + } else if (Register::isVirtualRegister(Reg)) { + // Generic virtual registers must have a type. + // If we end up here this means the type hasn't been specified and + // this is bad! + if (RegInfo->Kind == VRegInfo::GENERIC || + RegInfo->Kind == VRegInfo::REGBANK) + return error("generic virtual registers must have a type"); + } + Dest = MachineOperand::CreateReg( + Reg, Flags & RegState::Define, Flags & RegState::Implicit, + Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, + Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug, + Flags & RegState::InternalRead, Flags & RegState::Renamable); + + return false; +} + +bool MIParser::parseImmediateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::IntegerLiteral)); + const APSInt &Int = Token.integerValue(); + if (Int.getMinSignedBits() > 64) + return error("integer literal is too large to be an immediate operand"); + Dest = MachineOperand::CreateImm(Int.getExtValue()); + lex(); + return false; +} + +bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + const Constant *&C) { + auto Source = StringValue.str(); // The source has to be null terminated. + SMDiagnostic Err; + C = parseConstantValue(Source, Err, *MF.getFunction().getParent(), + &PFS.IRSlots); + if (!C) + return error(Loc + Err.getColumnNo(), Err.getMessage()); + return false; +} + +bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { + if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C)) + return true; + lex(); + return false; +} + +// See LLT implemntation for bit size limits. +static bool verifyScalarSize(uint64_t Size) { + return Size != 0 && isUInt<16>(Size); +} + +static bool verifyVectorElementCount(uint64_t NumElts) { + return NumElts != 0 && isUInt<16>(NumElts); +} + +static bool verifyAddrSpace(uint64_t AddrSpace) { + return isUInt<24>(AddrSpace); +} + +bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { + if (Token.range().front() == 's' || Token.range().front() == 'p') { + StringRef SizeStr = Token.range().drop_front(); + if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) + return error("expected integers after 's'/'p' type character"); + } + + if (Token.range().front() == 's') { + auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + + Ty = LLT::scalar(ScalarSize); + lex(); + return false; + } else if (Token.range().front() == 'p') { + const DataLayout &DL = MF.getDataLayout(); + uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyAddrSpace(AS)) + return error("invalid address space number"); + + Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); + lex(); + return false; + } + + // Now we're looking for a vector. + if (Token.isNot(MIToken::less)) + return error(Loc, + "expected sN, pA, <M x sN>, or <M x pA> for GlobalISel type"); + lex(); + + if (Token.isNot(MIToken::IntegerLiteral)) + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); + uint64_t NumElements = Token.integerValue().getZExtValue(); + if (!verifyVectorElementCount(NumElements)) + return error("invalid number of vector elements"); + + lex(); + + if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x") + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); + lex(); + + if (Token.range().front() != 's' && Token.range().front() != 'p') + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); + StringRef SizeStr = Token.range().drop_front(); + if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) + return error("expected integers after 's'/'p' type character"); + + if (Token.range().front() == 's') { + auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + Ty = LLT::scalar(ScalarSize); + } else if (Token.range().front() == 'p') { + const DataLayout &DL = MF.getDataLayout(); + uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue(); + if (!verifyAddrSpace(AS)) + return error("invalid address space number"); + + Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); + } else + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); + lex(); + + if (Token.isNot(MIToken::greater)) + return error(Loc, "expected <M x sN> or <M x pA> for vector type"); + lex(); + + Ty = LLT::vector(NumElements, Ty); + return false; +} + +bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::Identifier)); + StringRef TypeStr = Token.range(); + if (TypeStr.front() != 'i' && TypeStr.front() != 's' && + TypeStr.front() != 'p') + return error( + "a typed immediate operand should start with one of 'i', 's', or 'p'"); + StringRef SizeStr = Token.range().drop_front(); + if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) + return error("expected integers after 'i'/'s'/'p' type character"); + + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) { + if (Token.isNot(MIToken::Identifier) || + !(Token.range() == "true" || Token.range() == "false")) + return error("expected an integer literal"); + } + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C)); + return false; +} + +bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::FloatingPointLiteral) && + Token.isNot(MIToken::HexLiteral)) + return error("expected a floating point literal"); + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C)); + return false; +} + +bool MIParser::getUnsigned(unsigned &Result) { + if (Token.hasIntegerValue()) { + const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; + uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); + if (Val64 == Limit) + return error("expected 32-bit integer (too large)"); + Result = Val64; + return false; + } + if (Token.is(MIToken::HexLiteral)) { + APInt A; + if (getHexUint(A)) + return true; + if (A.getBitWidth() > 32) + return error("expected 32-bit integer (too large)"); + Result = A.getZExtValue(); + return false; + } + return true; +} + +bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { + assert(Token.is(MIToken::MachineBasicBlock) || + Token.is(MIToken::MachineBasicBlockLabel)); + unsigned Number; + if (getUnsigned(Number)) + return true; + auto MBBInfo = PFS.MBBSlots.find(Number); + if (MBBInfo == PFS.MBBSlots.end()) + return error(Twine("use of undefined machine basic block #") + + Twine(Number)); + MBB = MBBInfo->second; + // TODO: Only parse the name if it's a MachineBasicBlockLabel. Deprecate once + // we drop the <irname> from the bb.<id>.<irname> format. + if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) + return error(Twine("the name of machine basic block #") + Twine(Number) + + " isn't '" + Token.stringValue() + "'"); + return false; +} + +bool MIParser::parseMBBOperand(MachineOperand &Dest) { + MachineBasicBlock *MBB; + if (parseMBBReference(MBB)) + return true; + Dest = MachineOperand::CreateMBB(MBB); + lex(); + return false; +} + +bool MIParser::parseStackFrameIndex(int &FI) { + assert(Token.is(MIToken::StackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.StackObjectSlots.find(ID); + if (ObjectInfo == PFS.StackObjectSlots.end()) + return error(Twine("use of undefined stack object '%stack.") + Twine(ID) + + "'"); + StringRef Name; + if (const auto *Alloca = + MF.getFrameInfo().getObjectAllocation(ObjectInfo->second)) + Name = Alloca->getName(); + if (!Token.stringValue().empty() && Token.stringValue() != Name) + return error(Twine("the name of the stack object '%stack.") + Twine(ID) + + "' isn't '" + Token.stringValue() + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseFixedStackFrameIndex(int &FI) { + assert(Token.is(MIToken::FixedStackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID); + if (ObjectInfo == PFS.FixedStackObjectSlots.end()) + return error(Twine("use of undefined fixed stack object '%fixed-stack.") + + Twine(ID) + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseGlobalValue(GlobalValue *&GV) { + switch (Token.kind()) { + case MIToken::NamedGlobalValue: { + const Module *M = MF.getFunction().getParent(); + GV = M->getNamedValue(Token.stringValue()); + if (!GV) + return error(Twine("use of undefined global value '") + Token.range() + + "'"); + break; + } + case MIToken::GlobalValue: { + unsigned GVIdx; + if (getUnsigned(GVIdx)) + return true; + if (GVIdx >= PFS.IRSlots.GlobalValues.size()) + return error(Twine("use of undefined global value '@") + Twine(GVIdx) + + "'"); + GV = PFS.IRSlots.GlobalValues[GVIdx]; + break; + } + default: + llvm_unreachable("The current token should be a global value"); + } + return false; +} + +bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + lex(); + Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ConstantPoolItem)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ConstantInfo = PFS.ConstantPoolSlots.find(ID); + if (ConstantInfo == PFS.ConstantPoolSlots.end()) + return error("use of undefined constant '%const." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::JumpTableIndex)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID); + if (JumpTableEntryInfo == PFS.JumpTableSlots.end()) + return error("use of undefined jump table '%jump-table." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second); + return false; +} + +bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ExternalSymbol)); + const char *Symbol = MF.createExternalSymbolName(Token.stringValue()); + lex(); + Dest = MachineOperand::CreateES(Symbol); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::MCSymbol)); + MCSymbol *Symbol = getOrCreateMCSymbol(Token.stringValue()); + lex(); + Dest = MachineOperand::CreateMCSymbol(Symbol); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::SubRegisterIndex)); + StringRef Name = Token.stringValue(); + unsigned SubRegIndex = PFS.Target.getSubRegIndex(Token.stringValue()); + if (SubRegIndex == 0) + return error(Twine("unknown subregister index '") + Name + "'"); + lex(); + Dest = MachineOperand::CreateImm(SubRegIndex); + return false; +} + +bool MIParser::parseMDNode(MDNode *&Node) { + assert(Token.is(MIToken::exclaim)); + + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); + if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) + return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + lex(); + Node = NodeInfo->second.get(); + return false; +} + +bool MIParser::parseDIExpression(MDNode *&Expr) { + assert(Token.is(MIToken::md_diexpr)); + lex(); + + // FIXME: Share this parsing with the IL parser. + SmallVector<uint64_t, 8> Elements; + + if (expectAndConsume(MIToken::lparen)) + return true; + + if (Token.isNot(MIToken::rparen)) { + do { + if (Token.is(MIToken::Identifier)) { + if (unsigned Op = dwarf::getOperationEncoding(Token.stringValue())) { + lex(); + Elements.push_back(Op); + continue; + } + if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) { + lex(); + Elements.push_back(Enc); + continue; + } + return error(Twine("invalid DWARF op '") + Token.stringValue() + "'"); + } + + if (Token.isNot(MIToken::IntegerLiteral) || + Token.integerValue().isSigned()) + return error("expected unsigned integer"); + + auto &U = Token.integerValue(); + if (U.ugt(UINT64_MAX)) + return error("element too large, limit is " + Twine(UINT64_MAX)); + Elements.push_back(U.getZExtValue()); + lex(); + + } while (consumeIfPresent(MIToken::comma)); + } + + if (expectAndConsume(MIToken::rparen)) + return true; + + Expr = DIExpression::get(MF.getFunction().getContext(), Elements); + return false; +} + +bool MIParser::parseDILocation(MDNode *&Loc) { + assert(Token.is(MIToken::md_dilocation)); + lex(); + + bool HaveLine = false; + unsigned Line = 0; + unsigned Column = 0; + MDNode *Scope = nullptr; + MDNode *InlinedAt = nullptr; + bool ImplicitCode = false; + + if (expectAndConsume(MIToken::lparen)) + return true; + + if (Token.isNot(MIToken::rparen)) { + do { + if (Token.is(MIToken::Identifier)) { + if (Token.stringValue() == "line") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNot(MIToken::IntegerLiteral) || + Token.integerValue().isSigned()) + return error("expected unsigned integer"); + Line = Token.integerValue().getZExtValue(); + HaveLine = true; + lex(); + continue; + } + if (Token.stringValue() == "column") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNot(MIToken::IntegerLiteral) || + Token.integerValue().isSigned()) + return error("expected unsigned integer"); + Column = Token.integerValue().getZExtValue(); + lex(); + continue; + } + if (Token.stringValue() == "scope") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (parseMDNode(Scope)) + return error("expected metadata node"); + if (!isa<DIScope>(Scope)) + return error("expected DIScope node"); + continue; + } + if (Token.stringValue() == "inlinedAt") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(InlinedAt)) + return true; + } else if (Token.is(MIToken::md_dilocation)) { + if (parseDILocation(InlinedAt)) + return true; + } else + return error("expected metadata node"); + if (!isa<DILocation>(InlinedAt)) + return error("expected DILocation node"); + continue; + } + if (Token.stringValue() == "isImplicitCode") { + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (!Token.is(MIToken::Identifier)) + return error("expected true/false"); + // As far as I can see, we don't have any existing need for parsing + // true/false in MIR yet. Do it ad-hoc until there's something else + // that needs it. + if (Token.stringValue() == "true") + ImplicitCode = true; + else if (Token.stringValue() == "false") + ImplicitCode = false; + else + return error("expected true/false"); + lex(); + continue; + } + } + return error(Twine("invalid DILocation argument '") + + Token.stringValue() + "'"); + } while (consumeIfPresent(MIToken::comma)); + } + + if (expectAndConsume(MIToken::rparen)) + return true; + + if (!HaveLine) + return error("DILocation requires line number"); + if (!Scope) + return error("DILocation requires a scope"); + + Loc = DILocation::get(MF.getFunction().getContext(), Line, Column, Scope, + InlinedAt, ImplicitCode); + return false; +} + +bool MIParser::parseMetadataOperand(MachineOperand &Dest) { + MDNode *Node = nullptr; + if (Token.is(MIToken::exclaim)) { + if (parseMDNode(Node)) + return true; + } else if (Token.is(MIToken::md_diexpr)) { + if (parseDIExpression(Node)) + return true; + } + Dest = MachineOperand::CreateMetadata(Node); + return false; +} + +bool MIParser::parseCFIOffset(int &Offset) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected a cfi offset"); + if (Token.integerValue().getMinSignedBits() > 32) + return error("expected a 32 bit integer (the cfi offset is too large)"); + Offset = (int)Token.integerValue().getExtValue(); + lex(); + return false; +} + +bool MIParser::parseCFIRegister(unsigned &Reg) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a cfi register"); + unsigned LLVMReg; + if (parseNamedRegister(LLVMReg)) + return true; + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true); + if (DwarfReg < 0) + return error("invalid DWARF register"); + Reg = (unsigned)DwarfReg; + lex(); + return false; +} + +bool MIParser::parseCFIEscapeValues(std::string &Values) { + do { + if (Token.isNot(MIToken::HexLiteral)) + return error("expected a hexadecimal literal"); + unsigned Value; + if (getUnsigned(Value)) + return true; + if (Value > UINT8_MAX) + return error("expected a 8-bit integer (too large)"); + Values.push_back(static_cast<uint8_t>(Value)); + lex(); + } while (consumeIfPresent(MIToken::comma)); + return false; +} + +bool MIParser::parseCFIOperand(MachineOperand &Dest) { + auto Kind = Token.kind(); + lex(); + int Offset; + unsigned Reg; + unsigned CFIIndex; + switch (Kind) { + case MIToken::kw_cfi_same_value: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg)); + break; + case MIToken::kw_cfi_offset: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + CFIIndex = + MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); + break; + case MIToken::kw_cfi_rel_offset: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createRelOffset(nullptr, Reg, Offset)); + break; + case MIToken::kw_cfi_def_cfa_register: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + break; + case MIToken::kw_cfi_def_cfa_offset: + if (parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfaOffset negates the offset. + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); + break; + case MIToken::kw_cfi_adjust_cfa_offset: + if (parseCFIOffset(Offset)) + return true; + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createAdjustCfaOffset(nullptr, Offset)); + break; + case MIToken::kw_cfi_def_cfa: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfa negates the offset. + CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); + break; + case MIToken::kw_cfi_remember_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + break; + case MIToken::kw_cfi_restore: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg)); + break; + case MIToken::kw_cfi_restore_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + break; + case MIToken::kw_cfi_undefined: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg)); + break; + case MIToken::kw_cfi_register: { + unsigned Reg2; + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIRegister(Reg2)) + return true; + + CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRegister(nullptr, Reg, Reg2)); + break; + } + case MIToken::kw_cfi_window_save: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr)); + break; + case MIToken::kw_cfi_aarch64_negate_ra_sign_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + break; + case MIToken::kw_cfi_escape: { + std::string Values; + if (parseCFIEscapeValues(Values)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, Values)); + break; + } + default: + // TODO: Parse the other CFI operands. + llvm_unreachable("The current token should be a cfi operand"); + } + Dest = MachineOperand::CreateCFIIndex(CFIIndex); + return false; +} + +bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) { + switch (Token.kind()) { + case MIToken::NamedIRBlock: { + BB = dyn_cast_or_null<BasicBlock>( + F.getValueSymbolTable()->lookup(Token.stringValue())); + if (!BB) + return error(Twine("use of undefined IR block '") + Token.range() + "'"); + break; + } + case MIToken::IRBlock: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F)); + if (!BB) + return error(Twine("use of undefined IR block '%ir-block.") + + Twine(SlotNumber) + "'"); + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + return false; +} + +bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_blockaddress)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue)) + return error("expected a global value"); + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + auto *F = dyn_cast<Function>(GV); + if (!F) + return error("expected an IR function reference"); + lex(); + if (expectAndConsume(MIToken::comma)) + return true; + BasicBlock *BB = nullptr; + if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)) + return error("expected an IR block reference"); + if (parseIRBlock(BB, *F)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_intrinsic)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax intrinsic(@llvm.whatever)"); + + if (Token.isNot(MIToken::NamedGlobalValue)) + return error("expected syntax intrinsic(@llvm.whatever)"); + + std::string Name = Token.stringValue(); + lex(); + + if (expectAndConsume(MIToken::rparen)) + return error("expected ')' to terminate intrinsic name"); + + // Find out what intrinsic we're dealing with, first try the global namespace + // and then the target's private intrinsics if that fails. + const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo(); + Intrinsic::ID ID = Function::lookupIntrinsicID(Name); + if (ID == Intrinsic::not_intrinsic && TII) + ID = static_cast<Intrinsic::ID>(TII->lookupName(Name)); + + if (ID == Intrinsic::not_intrinsic) + return error("unknown intrinsic name"); + Dest = MachineOperand::CreateIntrinsicID(ID); + + return false; +} + +bool MIParser::parsePredicateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_intpred) || Token.is(MIToken::kw_floatpred)); + bool IsFloat = Token.is(MIToken::kw_floatpred); + lex(); + + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax intpred(whatever) or floatpred(whatever"); + + if (Token.isNot(MIToken::Identifier)) + return error("whatever"); + + CmpInst::Predicate Pred; + if (IsFloat) { + Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue()) + .Case("false", CmpInst::FCMP_FALSE) + .Case("oeq", CmpInst::FCMP_OEQ) + .Case("ogt", CmpInst::FCMP_OGT) + .Case("oge", CmpInst::FCMP_OGE) + .Case("olt", CmpInst::FCMP_OLT) + .Case("ole", CmpInst::FCMP_OLE) + .Case("one", CmpInst::FCMP_ONE) + .Case("ord", CmpInst::FCMP_ORD) + .Case("uno", CmpInst::FCMP_UNO) + .Case("ueq", CmpInst::FCMP_UEQ) + .Case("ugt", CmpInst::FCMP_UGT) + .Case("uge", CmpInst::FCMP_UGE) + .Case("ult", CmpInst::FCMP_ULT) + .Case("ule", CmpInst::FCMP_ULE) + .Case("une", CmpInst::FCMP_UNE) + .Case("true", CmpInst::FCMP_TRUE) + .Default(CmpInst::BAD_FCMP_PREDICATE); + if (!CmpInst::isFPPredicate(Pred)) + return error("invalid floating-point predicate"); + } else { + Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue()) + .Case("eq", CmpInst::ICMP_EQ) + .Case("ne", CmpInst::ICMP_NE) + .Case("sgt", CmpInst::ICMP_SGT) + .Case("sge", CmpInst::ICMP_SGE) + .Case("slt", CmpInst::ICMP_SLT) + .Case("sle", CmpInst::ICMP_SLE) + .Case("ugt", CmpInst::ICMP_UGT) + .Case("uge", CmpInst::ICMP_UGE) + .Case("ult", CmpInst::ICMP_ULT) + .Case("ule", CmpInst::ICMP_ULE) + .Default(CmpInst::BAD_ICMP_PREDICATE); + if (!CmpInst::isIntPredicate(Pred)) + return error("invalid integer predicate"); + } + + lex(); + Dest = MachineOperand::CreatePredicate(Pred); + if (expectAndConsume(MIToken::rparen)) + return error("predicate should be terminated by ')'."); + + return false; +} + +bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_shufflemask)); + + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax shufflemask(<integer or undef>, ...)"); + + SmallVector<Constant *, 32> ShufMask; + LLVMContext &Ctx = MF.getFunction().getContext(); + Type *I32Ty = Type::getInt32Ty(Ctx); + + bool AllZero = true; + bool AllUndef = true; + + do { + if (Token.is(MIToken::kw_undef)) { + ShufMask.push_back(UndefValue::get(I32Ty)); + AllZero = false; + } else if (Token.is(MIToken::IntegerLiteral)) { + AllUndef = false; + const APSInt &Int = Token.integerValue(); + if (!Int.isNullValue()) + AllZero = false; + ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue())); + } else + return error("expected integer constant"); + + lex(); + } while (consumeIfPresent(MIToken::comma)); + + if (expectAndConsume(MIToken::rparen)) + return error("shufflemask should be terminated by ')'."); + + if (AllZero || AllUndef) { + VectorType *VT = VectorType::get(I32Ty, ShufMask.size()); + Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT); + Dest = MachineOperand::CreateShuffleMask(C); + } else + Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask)); + + return false; +} + +bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_target_index)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target index"); + int Index = 0; + if (PFS.Target.getTargetIndex(Token.stringValue(), Index)) + return error("use of undefined target index '" + Token.stringValue() + "'"); + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) { + assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask"); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + + uint32_t *Mask = MF.allocateRegMask(); + while (true) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg; + if (parseNamedRegister(Reg)) + return true; + lex(); + Mask[Reg / 32] |= 1U << (Reg % 32); + // TODO: Report an error if the same register is used more than once. + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateRegMask(Mask); + return false; +} + +bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_liveout)); + uint32_t *Mask = MF.allocateRegMask(); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + while (true) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg; + if (parseNamedRegister(Reg)) + return true; + lex(); + Mask[Reg / 32] |= 1U << (Reg % 32); + // TODO: Report an error if the same register is used more than once. + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateRegLiveOut(Mask); + return false; +} + +bool MIParser::parseMachineOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx) { + switch (Token.kind()) { + case MIToken::kw_implicit: + case MIToken::kw_implicit_define: + case MIToken::kw_def: + case MIToken::kw_dead: + case MIToken::kw_killed: + case MIToken::kw_undef: + case MIToken::kw_internal: + case MIToken::kw_early_clobber: + case MIToken::kw_debug_use: + case MIToken::kw_renamable: + case MIToken::underscore: + case MIToken::NamedRegister: + case MIToken::VirtualRegister: + case MIToken::NamedVirtualRegister: + return parseRegisterOperand(Dest, TiedDefIdx); + case MIToken::IntegerLiteral: + return parseImmediateOperand(Dest); + case MIToken::kw_half: + case MIToken::kw_float: + case MIToken::kw_double: + case MIToken::kw_x86_fp80: + case MIToken::kw_fp128: + case MIToken::kw_ppc_fp128: + return parseFPImmediateOperand(Dest); + case MIToken::MachineBasicBlock: + return parseMBBOperand(Dest); + case MIToken::StackObject: + return parseStackObjectOperand(Dest); + case MIToken::FixedStackObject: + return parseFixedStackObjectOperand(Dest); + case MIToken::GlobalValue: + case MIToken::NamedGlobalValue: + return parseGlobalAddressOperand(Dest); + case MIToken::ConstantPoolItem: + return parseConstantPoolIndexOperand(Dest); + case MIToken::JumpTableIndex: + return parseJumpTableIndexOperand(Dest); + case MIToken::ExternalSymbol: + return parseExternalSymbolOperand(Dest); + case MIToken::MCSymbol: + return parseMCSymbolOperand(Dest); + case MIToken::SubRegisterIndex: + return parseSubRegisterIndexOperand(Dest); + case MIToken::md_diexpr: + case MIToken::exclaim: + return parseMetadataOperand(Dest); + case MIToken::kw_cfi_same_value: + case MIToken::kw_cfi_offset: + case MIToken::kw_cfi_rel_offset: + case MIToken::kw_cfi_def_cfa_register: + case MIToken::kw_cfi_def_cfa_offset: + case MIToken::kw_cfi_adjust_cfa_offset: + case MIToken::kw_cfi_escape: + case MIToken::kw_cfi_def_cfa: + case MIToken::kw_cfi_register: + case MIToken::kw_cfi_remember_state: + case MIToken::kw_cfi_restore: + case MIToken::kw_cfi_restore_state: + case MIToken::kw_cfi_undefined: + case MIToken::kw_cfi_window_save: + case MIToken::kw_cfi_aarch64_negate_ra_sign_state: + return parseCFIOperand(Dest); + case MIToken::kw_blockaddress: + return parseBlockAddressOperand(Dest); + case MIToken::kw_intrinsic: + return parseIntrinsicOperand(Dest); + case MIToken::kw_target_index: + return parseTargetIndexOperand(Dest); + case MIToken::kw_liveout: + return parseLiveoutRegisterMaskOperand(Dest); + case MIToken::kw_floatpred: + case MIToken::kw_intpred: + return parsePredicateOperand(Dest); + case MIToken::kw_shufflemask: + return parseShuffleMaskOperand(Dest); + case MIToken::Error: + return true; + case MIToken::Identifier: + if (const auto *RegMask = PFS.Target.getRegMask(Token.stringValue())) { + Dest = MachineOperand::CreateRegMask(RegMask); + lex(); + break; + } else if (Token.stringValue() == "CustomRegMask") { + return parseCustomRegisterMaskOperand(Dest); + } else + return parseTypedImmediateOperand(Dest); + default: + // FIXME: Parse the MCSymbol machine operand. + return error("expected a machine operand"); + } + return false; +} + +bool MIParser::parseMachineOperandAndTargetFlags( + MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) { + unsigned TF = 0; + bool HasTargetFlags = false; + if (Token.is(MIToken::kw_target_flags)) { + HasTargetFlags = true; + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + if (PFS.Target.getDirectTargetFlag(Token.stringValue(), TF)) { + if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + } + lex(); + while (Token.is(MIToken::comma)) { + lex(); + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + unsigned BitFlag = 0; + if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), BitFlag)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + // TODO: Report an error when using a duplicate bit target flag. + TF |= BitFlag; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + } + auto Loc = Token.location(); + if (parseMachineOperand(Dest, TiedDefIdx)) + return true; + if (!HasTargetFlags) + return false; + if (Dest.isReg()) + return error(Loc, "register operands can't have target flags"); + Dest.setTargetFlags(TF); + return false; +} + +bool MIParser::parseOffset(int64_t &Offset) { + if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus)) + return false; + StringRef Sign = Token.range(); + bool IsNegative = Token.is(MIToken::minus); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after '" + Sign + "'"); + if (Token.integerValue().getMinSignedBits() > 64) + return error("expected 64-bit integer (too large)"); + Offset = Token.integerValue().getExtValue(); + if (IsNegative) + Offset = -Offset; + lex(); + return false; +} + +bool MIParser::parseAlignment(unsigned &Alignment) { + assert(Token.is(MIToken::kw_align)); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected an integer literal after 'align'"); + if (getUnsigned(Alignment)) + return true; + lex(); + + if (!isPowerOf2_32(Alignment)) + return error("expected a power-of-2 literal after 'align'"); + + return false; +} + +bool MIParser::parseAddrspace(unsigned &Addrspace) { + assert(Token.is(MIToken::kw_addrspace)); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected an integer literal after 'addrspace'"); + if (getUnsigned(Addrspace)) + return true; + lex(); + return false; +} + +bool MIParser::parseOperandsOffset(MachineOperand &Op) { + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Op.setOffset(Offset); + return false; +} + +bool MIParser::parseIRValue(const Value *&V) { + switch (Token.kind()) { + case MIToken::NamedIRValue: { + V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue()); + break; + } + case MIToken::IRValue: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + V = getIRValue(SlotNumber); + break; + } + case MIToken::NamedGlobalValue: + case MIToken::GlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + V = GV; + break; + } + case MIToken::QuotedIRValue: { + const Constant *C = nullptr; + if (parseIRConstant(Token.location(), Token.stringValue(), C)) + return true; + V = C; + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + if (!V) + return error(Twine("use of undefined IR value '") + Token.range() + "'"); + return false; +} + +bool MIParser::getUint64(uint64_t &Result) { + if (Token.hasIntegerValue()) { + if (Token.integerValue().getActiveBits() > 64) + return error("expected 64-bit integer (too large)"); + Result = Token.integerValue().getZExtValue(); + return false; + } + if (Token.is(MIToken::HexLiteral)) { + APInt A; + if (getHexUint(A)) + return true; + if (A.getBitWidth() > 64) + return error("expected 64-bit integer (too large)"); + Result = A.getZExtValue(); + return false; + } + return true; +} + +bool MIParser::getHexUint(APInt &Result) { + assert(Token.is(MIToken::HexLiteral)); + StringRef S = Token.range(); + assert(S[0] == '0' && tolower(S[1]) == 'x'); + // This could be a floating point literal with a special prefix. + if (!isxdigit(S[2])) + return true; + StringRef V = S.substr(2); + APInt A(V.size()*4, V, 16); + + // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make + // sure it isn't the case before constructing result. + unsigned NumBits = (A == 0) ? 32 : A.getActiveBits(); + Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords())); + return false; +} + +bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { + const auto OldFlags = Flags; + switch (Token.kind()) { + case MIToken::kw_volatile: + Flags |= MachineMemOperand::MOVolatile; + break; + case MIToken::kw_non_temporal: + Flags |= MachineMemOperand::MONonTemporal; + break; + case MIToken::kw_dereferenceable: + Flags |= MachineMemOperand::MODereferenceable; + break; + case MIToken::kw_invariant: + Flags |= MachineMemOperand::MOInvariant; + break; + case MIToken::StringConstant: { + MachineMemOperand::Flags TF; + if (PFS.Target.getMMOTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target MMO flag '" + Token.stringValue() + + "'"); + Flags |= TF; + break; + } + default: + llvm_unreachable("The current token should be a memory operand flag"); + } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' memory operand flag"); + lex(); + return false; +} + +bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { + switch (Token.kind()) { + case MIToken::kw_stack: + PSV = MF.getPSVManager().getStack(); + break; + case MIToken::kw_got: + PSV = MF.getPSVManager().getGOT(); + break; + case MIToken::kw_jump_table: + PSV = MF.getPSVManager().getJumpTable(); + break; + case MIToken::kw_constant_pool: + PSV = MF.getPSVManager().getConstantPool(); + break; + case MIToken::FixedStackObject: { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + PSV = MF.getPSVManager().getFixedStack(FI); + // The token was already consumed, so use return here instead of break. + return false; + } + case MIToken::StackObject: { + int FI; + if (parseStackFrameIndex(FI)) + return true; + PSV = MF.getPSVManager().getFixedStack(FI); + // The token was already consumed, so use return here instead of break. + return false; + } + case MIToken::kw_call_entry: + lex(); + switch (Token.kind()) { + case MIToken::GlobalValue: + case MIToken::NamedGlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + PSV = MF.getPSVManager().getGlobalValueCallEntry(GV); + break; + } + case MIToken::ExternalSymbol: + PSV = MF.getPSVManager().getExternalSymbolCallEntry( + MF.createExternalSymbolName(Token.stringValue())); + break; + default: + return error( + "expected a global value or an external symbol after 'call-entry'"); + } + break; + default: + llvm_unreachable("The current token should be pseudo source value"); + } + lex(); + return false; +} + +bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { + if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) || + Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) || + Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) || + Token.is(MIToken::kw_call_entry)) { + const PseudoSourceValue *PSV = nullptr; + if (parseMemoryPseudoSourceValue(PSV)) + return true; + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(PSV, Offset); + return false; + } + if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) && + Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue) && + Token.isNot(MIToken::QuotedIRValue)) + return error("expected an IR value reference"); + const Value *V = nullptr; + if (parseIRValue(V)) + return true; + if (!V->getType()->isPointerTy()) + return error("expected a pointer IR value"); + lex(); + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(V, Offset); + return false; +} + +bool MIParser::parseOptionalScope(LLVMContext &Context, + SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") { + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected '(' in syncscope"); + + std::string SSN; + if (parseStringConstant(SSN)) + return true; + + SSID = Context.getOrInsertSyncScopeID(SSN); + if (expectAndConsume(MIToken::rparen)) + return error("expected ')' in syncscope"); + } + + return false; +} + +bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) { + Order = AtomicOrdering::NotAtomic; + if (Token.isNot(MIToken::Identifier)) + return false; + + Order = StringSwitch<AtomicOrdering>(Token.stringValue()) + .Case("unordered", AtomicOrdering::Unordered) + .Case("monotonic", AtomicOrdering::Monotonic) + .Case("acquire", AtomicOrdering::Acquire) + .Case("release", AtomicOrdering::Release) + .Case("acq_rel", AtomicOrdering::AcquireRelease) + .Case("seq_cst", AtomicOrdering::SequentiallyConsistent) + .Default(AtomicOrdering::NotAtomic); + + if (Order != AtomicOrdering::NotAtomic) { + lex(); + return false; + } + + return error("expected an atomic scope, ordering or a size specification"); +} + +bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { + if (expectAndConsume(MIToken::lparen)) + return true; + MachineMemOperand::Flags Flags = MachineMemOperand::MONone; + while (Token.isMemoryOperandFlag()) { + if (parseMemoryOperandFlag(Flags)) + return true; + } + if (Token.isNot(MIToken::Identifier) || + (Token.stringValue() != "load" && Token.stringValue() != "store")) + return error("expected 'load' or 'store' memory operation"); + if (Token.stringValue() == "load") + Flags |= MachineMemOperand::MOLoad; + else + Flags |= MachineMemOperand::MOStore; + lex(); + + // Optional 'store' for operands that both load and store. + if (Token.is(MIToken::Identifier) && Token.stringValue() == "store") { + Flags |= MachineMemOperand::MOStore; + lex(); + } + + // Optional synchronization scope. + SyncScope::ID SSID; + if (parseOptionalScope(MF.getFunction().getContext(), SSID)) + return true; + + // Up to two atomic orderings (cmpxchg provides guarantees on failure). + AtomicOrdering Order, FailureOrder; + if (parseOptionalAtomicOrdering(Order)) + return true; + + if (parseOptionalAtomicOrdering(FailureOrder)) + return true; + + if (Token.isNot(MIToken::IntegerLiteral) && + Token.isNot(MIToken::kw_unknown_size)) + return error("expected the size integer literal or 'unknown-size' after " + "memory operation"); + uint64_t Size; + if (Token.is(MIToken::IntegerLiteral)) { + if (getUint64(Size)) + return true; + } else if (Token.is(MIToken::kw_unknown_size)) { + Size = MemoryLocation::UnknownSize; + } + lex(); + + MachinePointerInfo Ptr = MachinePointerInfo(); + if (Token.is(MIToken::Identifier)) { + const char *Word = + ((Flags & MachineMemOperand::MOLoad) && + (Flags & MachineMemOperand::MOStore)) + ? "on" + : Flags & MachineMemOperand::MOLoad ? "from" : "into"; + if (Token.stringValue() != Word) + return error(Twine("expected '") + Word + "'"); + lex(); + + if (parseMachinePointerInfo(Ptr)) + return true; + } + unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1); + AAMDNodes AAInfo; + MDNode *Range = nullptr; + while (consumeIfPresent(MIToken::comma)) { + switch (Token.kind()) { + case MIToken::kw_align: + if (parseAlignment(BaseAlignment)) + return true; + break; + case MIToken::kw_addrspace: + if (parseAddrspace(Ptr.AddrSpace)) + return true; + break; + case MIToken::md_tbaa: + lex(); + if (parseMDNode(AAInfo.TBAA)) + return true; + break; + case MIToken::md_alias_scope: + lex(); + if (parseMDNode(AAInfo.Scope)) + return true; + break; + case MIToken::md_noalias: + lex(); + if (parseMDNode(AAInfo.NoAlias)) + return true; + break; + case MIToken::md_range: + lex(); + if (parseMDNode(Range)) + return true; + break; + // TODO: Report an error on duplicate metadata nodes. + default: + return error("expected 'align' or '!tbaa' or '!alias.scope' or " + "'!noalias' or '!range'"); + } + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range, + SSID, Order, FailureOrder); + return false; +} + +bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) { + assert((Token.is(MIToken::kw_pre_instr_symbol) || + Token.is(MIToken::kw_post_instr_symbol)) && + "Invalid token for a pre- post-instruction symbol!"); + lex(); + if (Token.isNot(MIToken::MCSymbol)) + return error("expected a symbol after 'pre-instr-symbol'"); + Symbol = getOrCreateMCSymbol(Token.stringValue()); + lex(); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) + return false; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine operand"); + lex(); + return false; +} + +static void initSlots2BasicBlocks( + const Function &F, + DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (auto &BB : F) { + if (BB.hasName()) + continue; + int Slot = MST.getLocalSlot(&BB); + if (Slot == -1) + continue; + Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB)); + } +} + +static const BasicBlock *getIRBlockFromSlot( + unsigned Slot, + const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { + auto BlockInfo = Slots2BasicBlocks.find(Slot); + if (BlockInfo == Slots2BasicBlocks.end()) + return nullptr; + return BlockInfo->second; +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot) { + if (Slots2BasicBlocks.empty()) + initSlots2BasicBlocks(MF.getFunction(), Slots2BasicBlocks); + return getIRBlockFromSlot(Slot, Slots2BasicBlocks); +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { + if (&F == &MF.getFunction()) + return getIRBlock(Slot); + DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks; + initSlots2BasicBlocks(F, CustomSlots2BasicBlocks); + return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks); +} + +static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, + DenseMap<unsigned, const Value *> &Slots2Values) { + int Slot = MST.getLocalSlot(V); + if (Slot == -1) + return; + Slots2Values.insert(std::make_pair(unsigned(Slot), V)); +} + +/// Creates the mapping from slot numbers to function's unnamed IR values. +static void initSlots2Values(const Function &F, + DenseMap<unsigned, const Value *> &Slots2Values) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (const auto &Arg : F.args()) + mapValueToSlot(&Arg, MST, Slots2Values); + for (const auto &BB : F) { + mapValueToSlot(&BB, MST, Slots2Values); + for (const auto &I : BB) + mapValueToSlot(&I, MST, Slots2Values); + } +} + +const Value *MIParser::getIRValue(unsigned Slot) { + if (Slots2Values.empty()) + initSlots2Values(MF.getFunction(), Slots2Values); + auto ValueInfo = Slots2Values.find(Slot); + if (ValueInfo == Slots2Values.end()) + return nullptr; + return ValueInfo->second; +} + +MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) { + // FIXME: Currently we can't recognize temporary or local symbols and call all + // of the appropriate forms to create them. However, this handles basic cases + // well as most of the special aspects are recognized by a prefix on their + // name, and the input names should already be unique. For test cases, keeping + // the symbol name out of the symbol table isn't terribly important. + return MF.getContext().getOrCreateSymbol(Name); +} + +bool MIParser::parseStringConstant(std::string &Result) { + if (Token.isNot(MIToken::StringConstant)) + return error("expected string constant"); + Result = Token.stringValue(); + lex(); + return false; +} + +bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, + StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots); +} + +bool llvm::parseMachineInstructions(PerFunctionMIParsingState &PFS, + StringRef Src, SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseBasicBlocks(); +} + +bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS, + MachineBasicBlock *&MBB, StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB); +} + +bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg); +} + +bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg); +} + +bool llvm::parseVirtualRegisterReference(PerFunctionMIParsingState &PFS, + VRegInfo *&Info, StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Info); +} + +bool llvm::parseStackObjectReference(PerFunctionMIParsingState &PFS, + int &FI, StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI); +} + +bool llvm::parseMDNode(PerFunctionMIParsingState &PFS, + MDNode *&Node, StringRef Src, SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); +} diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp new file mode 100644 index 000000000000..55fac93d8991 --- /dev/null +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -0,0 +1,954 @@ +//===- MIRParser.cpp - MIR serialization format parser implementation -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that parses the optional LLVM IR and machine +// functions that are stored in MIR files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRParser/MIRParser.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Target/TargetMachine.h" +#include <memory> + +using namespace llvm; + +namespace llvm { + +/// This class implements the parsing of LLVM IR that's embedded inside a MIR +/// file. +class MIRParserImpl { + SourceMgr SM; + yaml::Input In; + StringRef Filename; + LLVMContext &Context; + SlotMapping IRSlots; + std::unique_ptr<PerTargetMIParsingState> Target; + + /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are + /// created and inserted into the given module when this is true. + bool NoLLVMIR = false; + /// True when a well formed MIR file does not contain any MIR/machine function + /// parts. + bool NoMIRDocuments = false; + +public: + MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, + StringRef Filename, LLVMContext &Context); + + void reportDiagnostic(const SMDiagnostic &Diag); + + /// Report an error with the given message at unknown location. + /// + /// Always returns true. + bool error(const Twine &Message); + + /// Report an error with the given message at the given location. + /// + /// Always returns true. + bool error(SMLoc Loc, const Twine &Message); + + /// Report a given error with the location translated from the location in an + /// embedded string literal to a location in the MIR file. + /// + /// Always returns true. + bool error(const SMDiagnostic &Error, SMRange SourceRange); + + /// Try to parse the optional LLVM module and the machine functions in the MIR + /// file. + /// + /// Return null if an error occurred. + std::unique_ptr<Module> parseIRModule(); + + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); + + /// Parse the machine function in the current YAML document. + /// + /// + /// Return true if an error occurred. + bool parseMachineFunction(Module &M, MachineModuleInfo &MMI); + + /// Initialize the machine function to the state that's described in the MIR + /// file. + /// + /// Return true if error occurred. + bool initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF); + + bool parseRegisterInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); + + bool setupRegisterInfo(const PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); + + bool initializeFrameInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); + + bool initializeCallSiteInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); + + bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, + std::vector<CalleeSavedInfo> &CSIInfo, + const yaml::StringValue &RegisterSource, + bool IsRestored, int FrameIdx); + + template <typename T> + bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, + const T &Object, + int FrameIdx); + + bool initializeConstantPool(PerFunctionMIParsingState &PFS, + MachineConstantPool &ConstantPool, + const yaml::MachineFunction &YamlMF); + + bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineJumpTable &YamlJTI); + +private: + bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, + const yaml::StringValue &Source); + + bool parseMBBReference(PerFunctionMIParsingState &PFS, + MachineBasicBlock *&MBB, + const yaml::StringValue &Source); + + /// Return a MIR diagnostic converted from an MI string diagnostic. + SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, + SMRange SourceRange); + + /// Return a MIR diagnostic converted from a diagnostic located in a YAML + /// block scalar string. + SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange); + + void computeFunctionProperties(MachineFunction &MF); +}; + +} // end namespace llvm + +static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { + reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag); +} + +MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, + StringRef Filename, LLVMContext &Context) + : SM(), + In(SM.getMemoryBuffer( + SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(), + nullptr, handleYAMLDiag, this), + Filename(Filename), + Context(Context) { + In.setContext(&In); +} + +bool MIRParserImpl::error(const Twine &Message) { + Context.diagnose(DiagnosticInfoMIRParser( + DS_Error, SMDiagnostic(Filename, SourceMgr::DK_Error, Message.str()))); + return true; +} + +bool MIRParserImpl::error(SMLoc Loc, const Twine &Message) { + Context.diagnose(DiagnosticInfoMIRParser( + DS_Error, SM.GetMessage(Loc, SourceMgr::DK_Error, Message))); + return true; +} + +bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) { + assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error"); + reportDiagnostic(diagFromMIStringDiag(Error, SourceRange)); + return true; +} + +void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { + DiagnosticSeverity Kind; + switch (Diag.getKind()) { + case SourceMgr::DK_Error: + Kind = DS_Error; + break; + case SourceMgr::DK_Warning: + Kind = DS_Warning; + break; + case SourceMgr::DK_Note: + Kind = DS_Note; + break; + case SourceMgr::DK_Remark: + llvm_unreachable("remark unexpected"); + break; + } + Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); +} + +std::unique_ptr<Module> MIRParserImpl::parseIRModule() { + if (!In.setCurrentDocument()) { + if (In.error()) + return nullptr; + // Create an empty module when the MIR file is empty. + NoMIRDocuments = true; + return std::make_unique<Module>(Filename, Context); + } + + std::unique_ptr<Module> M; + // Parse the block scalar manually so that we can return unique pointer + // without having to go trough YAML traits. + if (const auto *BSN = + dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) { + SMDiagnostic Error; + M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, + Context, &IRSlots, /*UpgradeDebugInfo=*/false); + if (!M) { + reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange())); + return nullptr; + } + In.nextDocument(); + if (!In.setCurrentDocument()) + NoMIRDocuments = true; + } else { + // Create an new, empty module. + M = std::make_unique<Module>(Filename, Context); + NoLLVMIR = true; + } + return M; +} + +bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + if (NoMIRDocuments) + return false; + + // Parse the machine functions. + do { + if (parseMachineFunction(M, MMI)) + return true; + In.nextDocument(); + } while (In.setCurrentDocument()); + + return false; +} + +/// Create an empty function with the given name. +static Function *createDummyFunction(StringRef Name, Module &M) { + auto &Context = M.getContext(); + Function *F = + Function::Create(FunctionType::get(Type::getVoidTy(Context), false), + Function::ExternalLinkage, Name, M); + BasicBlock *BB = BasicBlock::Create(Context, "entry", F); + new UnreachableInst(Context, BB); + return F; +} + +bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { + // Parse the yaml. + yaml::MachineFunction YamlMF; + yaml::EmptyContext Ctx; + + const LLVMTargetMachine &TM = MMI.getTarget(); + YamlMF.MachineFuncInfo = std::unique_ptr<yaml::MachineFunctionInfo>( + TM.createDefaultFuncInfoYAML()); + + yaml::yamlize(In, YamlMF, false, Ctx); + if (In.error()) + return true; + + // Search for the corresponding IR function. + StringRef FunctionName = YamlMF.Name; + Function *F = M.getFunction(FunctionName); + if (!F) { + if (NoLLVMIR) { + F = createDummyFunction(FunctionName, M); + } else { + return error(Twine("function '") + FunctionName + + "' isn't defined in the provided LLVM IR"); + } + } + if (MMI.getMachineFunction(*F) != nullptr) + return error(Twine("redefinition of machine function '") + FunctionName + + "'"); + + // Create the MachineFunction. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + if (initializeMachineFunction(YamlMF, MF)) + return true; + + return false; +} + +static bool isSSA(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = Register::index2VirtReg(I); + if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg)) + return false; + } + return true; +} + +void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) { + MachineFunctionProperties &Properties = MF.getProperties(); + + bool HasPHI = false; + bool HasInlineAsm = false; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (MI.isPHI()) + HasPHI = true; + if (MI.isInlineAsm()) + HasInlineAsm = true; + } + } + if (!HasPHI) + Properties.set(MachineFunctionProperties::Property::NoPHIs); + MF.setHasInlineAsm(HasInlineAsm); + + if (isSSA(MF)) + Properties.set(MachineFunctionProperties::Property::IsSSA); + else + Properties.reset(MachineFunctionProperties::Property::IsSSA); + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.getNumVirtRegs() == 0) + Properties.set(MachineFunctionProperties::Property::NoVRegs); +} + +bool MIRParserImpl::initializeCallSiteInfo( + PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { + MachineFunction &MF = PFS.MF; + SMDiagnostic Error; + const LLVMTargetMachine &TM = MF.getTarget(); + for (auto YamlCSInfo : YamlMF.CallSitesInfo) { + yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation; + if (MILoc.BlockNum >= MF.size()) + return error(Twine(MF.getName()) + + Twine(" call instruction block out of range.") + + " Unable to reference bb:" + Twine(MILoc.BlockNum)); + auto CallB = std::next(MF.begin(), MILoc.BlockNum); + if (MILoc.Offset >= CallB->size()) + return error(Twine(MF.getName()) + + Twine(" call instruction offset out of range.") + + " Unable to reference instruction at bb: " + + Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset)); + auto CallI = std::next(CallB->instr_begin(), MILoc.Offset); + if (!CallI->isCall(MachineInstr::IgnoreBundle)) + return error(Twine(MF.getName()) + + Twine(" call site info should reference call " + "instruction. Instruction at bb:") + + Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset) + + " is not a call instruction"); + MachineFunction::CallSiteInfo CSInfo; + for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) { + unsigned Reg = 0; + if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error)) + return error(Error, ArgRegPair.Reg.SourceRange); + CSInfo.emplace_back(Reg, ArgRegPair.ArgNo); + } + + if (TM.Options.EnableDebugEntryValues) + MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo)); + } + + if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues) + return error(Twine("Call site info provided but not used")); + return false; +} + +bool +MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF) { + // TODO: Recreate the machine function. + if (Target) { + // Avoid clearing state if we're using the same subtarget again. + Target->setTarget(MF.getSubtarget()); + } else { + Target.reset(new PerTargetMIParsingState(MF.getSubtarget())); + } + + if (YamlMF.Alignment) + MF.setAlignment(Align(YamlMF.Alignment)); + MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); + MF.setHasWinCFI(YamlMF.HasWinCFI); + + if (YamlMF.Legalized) + MF.getProperties().set(MachineFunctionProperties::Property::Legalized); + if (YamlMF.RegBankSelected) + MF.getProperties().set( + MachineFunctionProperties::Property::RegBankSelected); + if (YamlMF.Selected) + MF.getProperties().set(MachineFunctionProperties::Property::Selected); + if (YamlMF.FailedISel) + MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); + + PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target); + if (parseRegisterInfo(PFS, YamlMF)) + return true; + if (!YamlMF.Constants.empty()) { + auto *ConstantPool = MF.getConstantPool(); + assert(ConstantPool && "Constant pool must be created"); + if (initializeConstantPool(PFS, *ConstantPool, YamlMF)) + return true; + } + + StringRef BlockStr = YamlMF.Body.Value.Value; + SMDiagnostic Error; + SourceMgr BlockSM; + BlockSM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(BlockStr, "",/*RequiresNullTerminator=*/false), + SMLoc()); + PFS.SM = &BlockSM; + if (parseMachineBasicBlockDefinitions(PFS, BlockStr, Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; + } + PFS.SM = &SM; + + // Initialize the frame information after creating all the MBBs so that the + // MBB references in the frame information can be resolved. + if (initializeFrameInfo(PFS, YamlMF)) + return true; + // Initialize the jump table after creating all the MBBs so that the MBB + // references can be resolved. + if (!YamlMF.JumpTableInfo.Entries.empty() && + initializeJumpTableInfo(PFS, YamlMF.JumpTableInfo)) + return true; + // Parse the machine instructions after creating all of the MBBs so that the + // parser can resolve the MBB references. + StringRef InsnStr = YamlMF.Body.Value.Value; + SourceMgr InsnSM; + InsnSM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(InsnStr, "", /*RequiresNullTerminator=*/false), + SMLoc()); + PFS.SM = &InsnSM; + if (parseMachineInstructions(PFS, InsnStr, Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; + } + PFS.SM = &SM; + + if (setupRegisterInfo(PFS, YamlMF)) + return true; + + if (YamlMF.MachineFuncInfo) { + const LLVMTargetMachine &TM = MF.getTarget(); + // Note this is called after the initial constructor of the + // MachineFunctionInfo based on the MachineFunction, which may depend on the + // IR. + + SMRange SrcRange; + if (TM.parseMachineFunctionInfo(*YamlMF.MachineFuncInfo, PFS, Error, + SrcRange)) { + return error(Error, SrcRange); + } + } + + // Set the reserved registers after parsing MachineFuncInfo. The target may + // have been recording information used to select the reserved registers + // there. + // FIXME: This is a temporary workaround until the reserved registers can be + // serialized. + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.freezeReservedRegs(MF); + + computeFunctionProperties(MF); + + if (initializeCallSiteInfo(PFS, YamlMF)) + return false; + + MF.getSubtarget().mirFileLoaded(MF); + + MF.verify(); + return false; +} + +bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF) { + MachineFunction &MF = PFS.MF; + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + assert(RegInfo.tracksLiveness()); + if (!YamlMF.TracksRegLiveness) + RegInfo.invalidateLiveness(); + + SMDiagnostic Error; + // Parse the virtual register information. + for (const auto &VReg : YamlMF.VirtualRegisters) { + VRegInfo &Info = PFS.getVRegInfo(VReg.ID.Value); + if (Info.Explicit) + return error(VReg.ID.SourceRange.Start, + Twine("redefinition of virtual register '%") + + Twine(VReg.ID.Value) + "'"); + Info.Explicit = true; + + if (StringRef(VReg.Class.Value).equals("_")) { + Info.Kind = VRegInfo::GENERIC; + Info.D.RegBank = nullptr; + } else { + const auto *RC = Target->getRegClass(VReg.Class.Value); + if (RC) { + Info.Kind = VRegInfo::NORMAL; + Info.D.RC = RC; + } else { + const RegisterBank *RegBank = Target->getRegBank(VReg.Class.Value); + if (!RegBank) + return error( + VReg.Class.SourceRange.Start, + Twine("use of undefined register class or register bank '") + + VReg.Class.Value + "'"); + Info.Kind = VRegInfo::REGBANK; + Info.D.RegBank = RegBank; + } + } + + if (!VReg.PreferredRegister.Value.empty()) { + if (Info.Kind != VRegInfo::NORMAL) + return error(VReg.Class.SourceRange.Start, + Twine("preferred register can only be set for normal vregs")); + + if (parseRegisterReference(PFS, Info.PreferredReg, + VReg.PreferredRegister.Value, Error)) + return error(Error, VReg.PreferredRegister.SourceRange); + } + } + + // Parse the liveins. + for (const auto &LiveIn : YamlMF.LiveIns) { + unsigned Reg = 0; + if (parseNamedRegisterReference(PFS, Reg, LiveIn.Register.Value, Error)) + return error(Error, LiveIn.Register.SourceRange); + unsigned VReg = 0; + if (!LiveIn.VirtualRegister.Value.empty()) { + VRegInfo *Info; + if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value, + Error)) + return error(Error, LiveIn.VirtualRegister.SourceRange); + VReg = Info->VReg; + } + RegInfo.addLiveIn(Reg, VReg); + } + + // Parse the callee saved registers (Registers that will + // be saved for the caller). + if (YamlMF.CalleeSavedRegisters) { + SmallVector<MCPhysReg, 16> CalleeSavedRegisters; + for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { + unsigned Reg = 0; + if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error)) + return error(Error, RegSource.SourceRange); + CalleeSavedRegisters.push_back(Reg); + } + RegInfo.setCalleeSavedRegs(CalleeSavedRegisters); + } + + return false; +} + +bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF) { + MachineFunction &MF = PFS.MF; + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool Error = false; + // Create VRegs + auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) { + unsigned Reg = Info.VReg; + switch (Info.Kind) { + case VRegInfo::UNKNOWN: + error(Twine("Cannot determine class/bank of virtual register ") + + Name + " in function '" + MF.getName() + "'"); + Error = true; + break; + case VRegInfo::NORMAL: + MRI.setRegClass(Reg, Info.D.RC); + if (Info.PreferredReg != 0) + MRI.setSimpleHint(Reg, Info.PreferredReg); + break; + case VRegInfo::GENERIC: + break; + case VRegInfo::REGBANK: + MRI.setRegBank(Reg, *Info.D.RegBank); + break; + } + }; + + for (auto I = PFS.VRegInfosNamed.begin(), E = PFS.VRegInfosNamed.end(); + I != E; I++) { + const VRegInfo &Info = *I->second; + populateVRegInfo(Info, Twine(I->first())); + } + + for (auto P : PFS.VRegInfos) { + const VRegInfo &Info = *P.second; + populateVRegInfo(Info, Twine(P.first)); + } + + // Compute MachineRegisterInfo::UsedPhysRegMask + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + MRI.addPhysRegsUsedFromRegMask(MO.getRegMask()); + } + } + } + + return Error; +} + +bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF) { + MachineFunction &MF = PFS.MF; + MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const Function &F = MF.getFunction(); + const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; + MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); + MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); + MFI.setHasStackMap(YamlMFI.HasStackMap); + MFI.setHasPatchPoint(YamlMFI.HasPatchPoint); + MFI.setStackSize(YamlMFI.StackSize); + MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment); + if (YamlMFI.MaxAlignment) + MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); + MFI.setAdjustsStack(YamlMFI.AdjustsStack); + MFI.setHasCalls(YamlMFI.HasCalls); + if (YamlMFI.MaxCallFrameSize != ~0u) + MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); + MFI.setCVBytesOfCalleeSavedRegisters(YamlMFI.CVBytesOfCalleeSavedRegisters); + MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); + MFI.setHasVAStart(YamlMFI.HasVAStart); + MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); + MFI.setLocalFrameSize(YamlMFI.LocalFrameSize); + if (!YamlMFI.SavePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint)) + return true; + MFI.setSavePoint(MBB); + } + if (!YamlMFI.RestorePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(PFS, MBB, YamlMFI.RestorePoint)) + return true; + MFI.setRestorePoint(MBB); + } + + std::vector<CalleeSavedInfo> CSIInfo; + // Initialize the fixed frame objects. + for (const auto &Object : YamlMF.FixedStackObjects) { + int ObjectIdx; + if (Object.Type != yaml::FixedMachineStackObject::SpillSlot) + ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset, + Object.IsImmutable, Object.IsAliased); + else + ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); + + if (!TFI->isSupportedStackID(Object.StackID)) + return error(Object.ID.SourceRange.Start, + Twine("StackID is not supported by target")); + MFI.setStackID(ObjectIdx, Object.StackID); + MFI.setObjectAlignment(ObjectIdx, Object.Alignment); + if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, + ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of fixed stack object '%fixed-stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, + Object.CalleeSavedRestored, ObjectIdx)) + return true; + if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx)) + return true; + } + + // Initialize the ordinary frame objects. + for (const auto &Object : YamlMF.StackObjects) { + int ObjectIdx; + const AllocaInst *Alloca = nullptr; + const yaml::StringValue &Name = Object.Name; + if (!Name.Value.empty()) { + Alloca = dyn_cast_or_null<AllocaInst>( + F.getValueSymbolTable()->lookup(Name.Value)); + if (!Alloca) + return error(Name.SourceRange.Start, + "alloca instruction named '" + Name.Value + + "' isn't defined in the function '" + F.getName() + + "'"); + } + if (!TFI->isSupportedStackID(Object.StackID)) + return error(Object.ID.SourceRange.Start, + Twine("StackID is not supported by target")); + if (Object.Type == yaml::MachineStackObject::VariableSized) + ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); + else + ObjectIdx = MFI.CreateStackObject( + Object.Size, Object.Alignment, + Object.Type == yaml::MachineStackObject::SpillSlot, Alloca, + Object.StackID); + MFI.setObjectOffset(ObjectIdx, Object.Offset); + + if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of stack object '%stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, + Object.CalleeSavedRestored, ObjectIdx)) + return true; + if (Object.LocalOffset) + MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); + if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx)) + return true; + } + MFI.setCalleeSavedInfo(CSIInfo); + if (!CSIInfo.empty()) + MFI.setCalleeSavedInfoValid(true); + + // Initialize the various stack object references after initializing the + // stack objects. + if (!YamlMFI.StackProtector.Value.empty()) { + SMDiagnostic Error; + int FI; + if (parseStackObjectReference(PFS, FI, YamlMFI.StackProtector.Value, Error)) + return error(Error, YamlMFI.StackProtector.SourceRange); + MFI.setStackProtectorIndex(FI); + } + return false; +} + +bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, + std::vector<CalleeSavedInfo> &CSIInfo, + const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) { + if (RegisterSource.Value.empty()) + return false; + unsigned Reg = 0; + SMDiagnostic Error; + if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error)) + return error(Error, RegisterSource.SourceRange); + CalleeSavedInfo CSI(Reg, FrameIdx); + CSI.setRestored(IsRestored); + CSIInfo.push_back(CSI); + return false; +} + +/// Verify that given node is of a certain type. Return true on error. +template <typename T> +static bool typecheckMDNode(T *&Result, MDNode *Node, + const yaml::StringValue &Source, + StringRef TypeString, MIRParserImpl &Parser) { + if (!Node) + return false; + Result = dyn_cast<T>(Node); + if (!Result) + return Parser.error(Source.SourceRange.Start, + "expected a reference to a '" + TypeString + + "' metadata node"); + return false; +} + +template <typename T> +bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, + const T &Object, int FrameIdx) { + // Debug information can only be attached to stack objects; Fixed stack + // objects aren't supported. + MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr; + if (parseMDNode(PFS, Var, Object.DebugVar) || + parseMDNode(PFS, Expr, Object.DebugExpr) || + parseMDNode(PFS, Loc, Object.DebugLoc)) + return true; + if (!Var && !Expr && !Loc) + return false; + DILocalVariable *DIVar = nullptr; + DIExpression *DIExpr = nullptr; + DILocation *DILoc = nullptr; + if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) || + typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) || + typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this)) + return true; + PFS.MF.setVariableDbgInfo(DIVar, DIExpr, FrameIdx, DILoc); + return false; +} + +bool MIRParserImpl::parseMDNode(PerFunctionMIParsingState &PFS, + MDNode *&Node, const yaml::StringValue &Source) { + if (Source.Value.empty()) + return false; + SMDiagnostic Error; + if (llvm::parseMDNode(PFS, Node, Source.Value, Error)) + return error(Error, Source.SourceRange); + return false; +} + +bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, + MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) { + DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots; + const MachineFunction &MF = PFS.MF; + const auto &M = *MF.getFunction().getParent(); + SMDiagnostic Error; + for (const auto &YamlConstant : YamlMF.Constants) { + if (YamlConstant.IsTargetSpecific) + // FIXME: Support target-specific constant pools + return error(YamlConstant.Value.SourceRange.Start, + "Can't parse target-specific constant pool entries yet"); + const Constant *Value = dyn_cast_or_null<Constant>( + parseConstantValue(YamlConstant.Value.Value, Error, M)); + if (!Value) + return error(Error, YamlConstant.Value.SourceRange); + unsigned Alignment = + YamlConstant.Alignment + ? YamlConstant.Alignment + : M.getDataLayout().getPrefTypeAlignment(Value->getType()); + unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); + if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) + .second) + return error(YamlConstant.ID.SourceRange.Start, + Twine("redefinition of constant pool item '%const.") + + Twine(YamlConstant.ID.Value) + "'"); + } + return false; +} + +bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineJumpTable &YamlJTI) { + MachineJumpTableInfo *JTI = PFS.MF.getOrCreateJumpTableInfo(YamlJTI.Kind); + for (const auto &Entry : YamlJTI.Entries) { + std::vector<MachineBasicBlock *> Blocks; + for (const auto &MBBSource : Entry.Blocks) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(PFS, MBB, MBBSource.Value)) + return true; + Blocks.push_back(MBB); + } + unsigned Index = JTI->createJumpTableIndex(Blocks); + if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index)) + .second) + return error(Entry.ID.SourceRange.Start, + Twine("redefinition of jump table entry '%jump-table.") + + Twine(Entry.ID.Value) + "'"); + } + return false; +} + +bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS, + MachineBasicBlock *&MBB, + const yaml::StringValue &Source) { + SMDiagnostic Error; + if (llvm::parseMBBReference(PFS, MBB, Source.Value, Error)) + return error(Error, Source.SourceRange); + return false; +} + +SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, + SMRange SourceRange) { + assert(SourceRange.isValid() && "Invalid source range"); + SMLoc Loc = SourceRange.Start; + bool HasQuote = Loc.getPointer() < SourceRange.End.getPointer() && + *Loc.getPointer() == '\''; + // Translate the location of the error from the location in the MI string to + // the corresponding location in the MIR file. + Loc = Loc.getFromPointer(Loc.getPointer() + Error.getColumnNo() + + (HasQuote ? 1 : 0)); + + // TODO: Translate any source ranges as well. + return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), None, + Error.getFixIts()); +} + +SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange) { + assert(SourceRange.isValid()); + + // Translate the location of the error from the location in the llvm IR string + // to the corresponding location in the MIR file. + auto LineAndColumn = SM.getLineAndColumn(SourceRange.Start); + unsigned Line = LineAndColumn.first + Error.getLineNo() - 1; + unsigned Column = Error.getColumnNo(); + StringRef LineStr = Error.getLineContents(); + SMLoc Loc = Error.getLoc(); + + // Get the full line and adjust the column number by taking the indentation of + // LLVM IR into account. + for (line_iterator L(*SM.getMemoryBuffer(SM.getMainFileID()), false), E; + L != E; ++L) { + if (L.line_number() == Line) { + LineStr = *L; + Loc = SMLoc::getFromPointer(LineStr.data()); + auto Indent = LineStr.find(Error.getLineContents()); + if (Indent != StringRef::npos) + Column += Indent; + break; + } + } + + return SMDiagnostic(SM, Loc, Filename, Line, Column, Error.getKind(), + Error.getMessage(), LineStr, Error.getRanges(), + Error.getFixIts()); +} + +MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl) + : Impl(std::move(Impl)) {} + +MIRParser::~MIRParser() {} + +std::unique_ptr<Module> MIRParser::parseIRModule() { + return Impl->parseIRModule(); +} + +bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + return Impl->parseMachineFunctions(M, MMI); +} + +std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename, + SMDiagnostic &Error, + LLVMContext &Context) { + auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = FileOrErr.getError()) { + Error = SMDiagnostic(Filename, SourceMgr::DK_Error, + "Could not open input file: " + EC.message()); + return nullptr; + } + return createMIRParser(std::move(FileOrErr.get()), Context); +} + +std::unique_ptr<MIRParser> +llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents, + LLVMContext &Context) { + auto Filename = Contents->getBufferIdentifier(); + if (Context.shouldDiscardValueNames()) { + Context.diagnose(DiagnosticInfoMIRParser( + DS_Error, + SMDiagnostic( + Filename, SourceMgr::DK_Error, + "Can't read MIR with a Context that discards named Values"))); + return nullptr; + } + return std::make_unique<MIRParser>( + std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context)); +} |