summaryrefslogtreecommitdiff
path: root/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp')
-rw-r--r--lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp778
1 files changed, 463 insertions, 315 deletions
diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 2d92b93ca7047..0a5908f437906 100644
--- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -18,13 +18,14 @@
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/TargetRegistry.h"
@@ -34,27 +35,10 @@ using namespace llvm;
namespace {
-// We store register types as SimpleValueType to retain SIMD layout
-// information, but must also be able to supply them as the (unnamed)
-// register enum from WebAssemblyRegisterInfo.td/.inc.
-static unsigned MVTToWasmReg(MVT::SimpleValueType Type) {
- switch(Type) {
- case MVT::i32: return WebAssembly::I32_0;
- case MVT::i64: return WebAssembly::I64_0;
- case MVT::f32: return WebAssembly::F32_0;
- case MVT::f64: return WebAssembly::F64_0;
- case MVT::v16i8: return WebAssembly::V128_0;
- case MVT::v8i16: return WebAssembly::V128_0;
- case MVT::v4i32: return WebAssembly::V128_0;
- case MVT::v4f32: return WebAssembly::V128_0;
- default: return MVT::INVALID_SIMPLE_VALUE_TYPE;
- }
-}
-
/// WebAssemblyOperand - Instances of this class represent the operands in a
/// parsed WASM machine instruction.
struct WebAssemblyOperand : public MCParsedAsmOperand {
- enum KindTy { Token, Local, Stack, Integer, Float, Symbol } Kind;
+ enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
SMLoc StartLoc, EndLoc;
@@ -62,19 +46,6 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
StringRef Tok;
};
- struct RegOp {
- // This is a (virtual) local or stack register represented as 0..
- unsigned RegNo;
- // In most targets, the register number also encodes the type, but for
- // wasm we have to track that seperately since we have an unbounded
- // number of registers.
- // This has the unfortunate side effect that we supply a different value
- // to the table-gen matcher at different times in the process (when it
- // calls getReg() or addRegOperands().
- // TODO: While this works, it feels brittle. and would be nice to clean up.
- MVT::SimpleValueType Type;
- };
-
struct IntOp {
int64_t Val;
};
@@ -87,37 +58,45 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
const MCExpr *Exp;
};
+ struct BrLOp {
+ std::vector<unsigned> List;
+ };
+
union {
struct TokOp Tok;
- struct RegOp Reg;
struct IntOp Int;
struct FltOp Flt;
struct SymOp Sym;
+ struct BrLOp BrL;
};
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
- : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
- WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, RegOp R)
- : Kind(K), StartLoc(Start), EndLoc(End), Reg(R) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
- : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
- : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
- : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+ : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+ WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
+ : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
+
+ ~WebAssemblyOperand() {
+ if (isBrList())
+ BrL.~BrLOp();
+ }
bool isToken() const override { return Kind == Token; }
- bool isImm() const override { return Kind == Integer ||
- Kind == Float ||
- Kind == Symbol; }
- bool isReg() const override { return Kind == Local || Kind == Stack; }
+ bool isImm() const override {
+ return Kind == Integer || Kind == Float || Kind == Symbol;
+ }
bool isMem() const override { return false; }
+ bool isReg() const override { return false; }
+ bool isBrList() const { return Kind == BrList; }
unsigned getReg() const override {
- assert(isReg());
- // This is called from the tablegen matcher (MatchInstructionImpl)
- // where it expects to match the type of register, see RegOp above.
- return MVTToWasmReg(Reg.Type);
+ llvm_unreachable("Assembly inspects a register operand");
+ return 0;
}
StringRef getToken() const {
@@ -128,19 +107,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
SMLoc getStartLoc() const override { return StartLoc; }
SMLoc getEndLoc() const override { return EndLoc; }
- void addRegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- assert(isReg() && "Not a register operand!");
- // This is called from the tablegen matcher (MatchInstructionImpl)
- // where it expects to output the actual register index, see RegOp above.
- unsigned R = Reg.RegNo;
- if (Kind == Stack) {
- // A stack register is represented as a large negative number.
- // See WebAssemblyRegNumbering::runOnMachineFunction and
- // getWARegStackId for why this | is needed.
- R |= INT32_MIN;
- }
- Inst.addOperand(MCOperand::createReg(R));
+ void addRegOperands(MCInst &, unsigned) const {
+ // Required by the assembly matcher.
+ llvm_unreachable("Assembly matcher creates register operands");
}
void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -155,17 +124,17 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
llvm_unreachable("Should be immediate or symbol!");
}
+ void addBrListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && isBrList() && "Invalid BrList!");
+ for (auto Br : BrL.List)
+ Inst.addOperand(MCOperand::createImm(Br));
+ }
+
void print(raw_ostream &OS) const override {
switch (Kind) {
case Token:
OS << "Tok:" << Tok.Tok;
break;
- case Local:
- OS << "Loc:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
- break;
- case Stack:
- OS << "Stk:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
- break;
case Integer:
OS << "Int:" << Int.Val;
break;
@@ -175,6 +144,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
case Symbol:
OS << "Sym:" << Sym.Exp;
break;
+ case BrList:
+ OS << "BrList:" << BrL.List.size();
+ break;
}
}
};
@@ -182,352 +154,526 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
class WebAssemblyAsmParser final : public MCTargetAsmParser {
MCAsmParser &Parser;
MCAsmLexer &Lexer;
- // These are for the current function being parsed:
- // These are vectors since register assignments are so far non-sparse.
- // Replace by map if necessary.
- std::vector<MVT::SimpleValueType> LocalTypes;
- std::vector<MVT::SimpleValueType> StackTypes;
- MCSymbol *LastLabel;
+
+ // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
+ std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
+
+ // Order of labels, directives and instructions in a .s file have no
+ // syntactical enforcement. This class is a callback from the actual parser,
+ // and yet we have to be feeding data to the streamer in a very particular
+ // order to ensure a correct binary encoding that matches the regular backend
+ // (the streamer does not enforce this). This "state machine" enum helps
+ // guarantee that correct order.
+ enum ParserState {
+ FileStart,
+ Label,
+ FunctionStart,
+ FunctionLocals,
+ Instructions,
+ } CurrentState = FileStart;
+
+ // For ensuring blocks are properly nested.
+ enum NestingType {
+ Function,
+ Block,
+ Loop,
+ Try,
+ If,
+ Else,
+ Undefined,
+ };
+ std::vector<NestingType> NestingStack;
+
+ // We track this to see if a .functype following a label is the same,
+ // as this is how we recognize the start of a function.
+ MCSymbol *LastLabel = nullptr;
public:
- WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
- const MCInstrInfo &mii, const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
- Lexer(Parser.getLexer()), LastLabel(nullptr) {
+ WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
+ Lexer(Parser.getLexer()) {
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
#define GET_ASSEMBLER_HEADER
#include "WebAssemblyGenAsmMatcher.inc"
// TODO: This is required to be implemented, but appears unused.
- bool ParseRegister(unsigned &/*RegNo*/, SMLoc &/*StartLoc*/,
- SMLoc &/*EndLoc*/) override {
+ bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
+ SMLoc & /*EndLoc*/) override {
llvm_unreachable("ParseRegister is not implemented.");
}
- bool Error(const StringRef &msg, const AsmToken &tok) {
- return Parser.Error(tok.getLoc(), msg + tok.getString());
+ bool error(const Twine &Msg, const AsmToken &Tok) {
+ return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
+ }
+
+ bool error(const Twine &Msg) {
+ return Parser.Error(Lexer.getTok().getLoc(), Msg);
+ }
+
+ void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
+ Signatures.push_back(std::move(Sig));
+ }
+
+ std::pair<StringRef, StringRef> nestingString(NestingType NT) {
+ switch (NT) {
+ case Function:
+ return {"function", "end_function"};
+ case Block:
+ return {"block", "end_block"};
+ case Loop:
+ return {"loop", "end_loop"};
+ case Try:
+ return {"try", "end_try"};
+ case If:
+ return {"if", "end_if"};
+ case Else:
+ return {"else", "end_if"};
+ default:
+ llvm_unreachable("unknown NestingType");
+ }
+ }
+
+ void push(NestingType NT) { NestingStack.push_back(NT); }
+
+ bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
+ if (NestingStack.empty())
+ return error(Twine("End of block construct with no start: ") + Ins);
+ auto Top = NestingStack.back();
+ if (Top != NT1 && Top != NT2)
+ return error(Twine("Block construct type mismatch, expected: ") +
+ nestingString(Top).second + ", instead got: " + Ins);
+ NestingStack.pop_back();
+ return false;
+ }
+
+ bool ensureEmptyNestingStack() {
+ auto err = !NestingStack.empty();
+ while (!NestingStack.empty()) {
+ error(Twine("Unmatched block construct(s) at function end: ") +
+ nestingString(NestingStack.back()).first);
+ NestingStack.pop_back();
+ }
+ return err;
}
- bool IsNext(AsmToken::TokenKind Kind) {
- auto ok = Lexer.is(Kind);
- if (ok) Parser.Lex();
- return ok;
+ bool isNext(AsmToken::TokenKind Kind) {
+ auto Ok = Lexer.is(Kind);
+ if (Ok)
+ Parser.Lex();
+ return Ok;
}
- bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
- if (!IsNext(Kind))
- return Error(std::string("Expected ") + KindName + ", instead got: ",
+ bool expect(AsmToken::TokenKind Kind, const char *KindName) {
+ if (!isNext(Kind))
+ return error(std::string("Expected ") + KindName + ", instead got: ",
Lexer.getTok());
return false;
}
- MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
- // Derive type from .param .local decls, or the instruction itself.
- return StringSwitch<MVT::SimpleValueType>(RegType)
- .Case("i32", MVT::i32)
- .Case("i64", MVT::i64)
- .Case("f32", MVT::f32)
- .Case("f64", MVT::f64)
- .Case("i8x16", MVT::v16i8)
- .Case("i16x8", MVT::v8i16)
- .Case("i32x4", MVT::v4i32)
- .Case("f32x4", MVT::v4f32)
- .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
+ StringRef expectIdent() {
+ if (!Lexer.is(AsmToken::Identifier)) {
+ error("Expected identifier, got: ", Lexer.getTok());
+ return StringRef();
+ }
+ auto Name = Lexer.getTok().getString();
+ Parser.Lex();
+ return Name;
}
- MVT::SimpleValueType &GetType(
- std::vector<MVT::SimpleValueType> &Types, size_t i) {
- Types.resize(std::max(i + 1, Types.size()), MVT::INVALID_SIMPLE_VALUE_TYPE);
- return Types[i];
+ Optional<wasm::ValType> parseType(const StringRef &Type) {
+ // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
+ // "invalid" value.
+ if (Type == "i32")
+ return wasm::ValType::I32;
+ if (Type == "i64")
+ return wasm::ValType::I64;
+ if (Type == "f32")
+ return wasm::ValType::F32;
+ if (Type == "f64")
+ return wasm::ValType::F64;
+ if (Type == "v128" || Type == "i8x16" || Type == "i16x8" ||
+ Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
+ Type == "f64x2")
+ return wasm::ValType::V128;
+ return Optional<wasm::ValType>();
}
- bool ParseReg(OperandVector &Operands, StringRef TypePrefix) {
- if (Lexer.is(AsmToken::Integer)) {
- auto &Local = Lexer.getTok();
- // This is a reference to a local, turn it into a virtual register.
- auto LocalNo = static_cast<unsigned>(Local.getIntVal());
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Local, Local.getLoc(),
- Local.getEndLoc(),
- WebAssemblyOperand::RegOp{LocalNo,
- GetType(LocalTypes, LocalNo)}));
- Parser.Lex();
- } else if (Lexer.is(AsmToken::Identifier)) {
- auto &StackRegTok = Lexer.getTok();
- // These are push/pop/drop pseudo stack registers, which we turn
- // into virtual registers also. The stackify pass will later turn them
- // back into implicit stack references if possible.
- auto StackReg = StackRegTok.getString();
- auto StackOp = StackReg.take_while([](char c) { return isalpha(c); });
- auto Reg = StackReg.drop_front(StackOp.size());
- unsigned long long ParsedRegNo = 0;
- if (!Reg.empty() && getAsUnsignedInteger(Reg, 10, ParsedRegNo))
- return Error("Cannot parse stack register index: ", StackRegTok);
- unsigned RegNo = static_cast<unsigned>(ParsedRegNo);
- if (StackOp == "push") {
- // This defines a result, record register type.
- auto RegType = ParseRegType(TypePrefix);
- GetType(StackTypes, RegNo) = RegType;
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Stack,
- StackRegTok.getLoc(),
- StackRegTok.getEndLoc(),
- WebAssemblyOperand::RegOp{RegNo, RegType}));
- } else if (StackOp == "pop") {
- // This uses a previously defined stack value.
- auto RegType = GetType(StackTypes, RegNo);
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Stack,
- StackRegTok.getLoc(),
- StackRegTok.getEndLoc(),
- WebAssemblyOperand::RegOp{RegNo, RegType}));
- } else if (StackOp == "drop") {
- // This operand will be dropped, since it is part of an instruction
- // whose result is void.
- } else {
- return Error("Unknown stack register prefix: ", StackRegTok);
- }
+ WebAssembly::ExprType parseBlockType(StringRef ID) {
+ return StringSwitch<WebAssembly::ExprType>(ID)
+ .Case("i32", WebAssembly::ExprType::I32)
+ .Case("i64", WebAssembly::ExprType::I64)
+ .Case("f32", WebAssembly::ExprType::F32)
+ .Case("f64", WebAssembly::ExprType::F64)
+ .Case("v128", WebAssembly::ExprType::V128)
+ .Case("except_ref", WebAssembly::ExprType::ExceptRef)
+ .Case("void", WebAssembly::ExprType::Void)
+ .Default(WebAssembly::ExprType::Invalid);
+ }
+
+ bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
+ while (Lexer.is(AsmToken::Identifier)) {
+ auto Type = parseType(Lexer.getTok().getString());
+ if (!Type)
+ return true;
+ Types.push_back(Type.getValue());
Parser.Lex();
- } else {
- return Error(
- "Expected identifier/integer following $, instead got: ",
- Lexer.getTok());
+ if (!isNext(AsmToken::Comma))
+ break;
}
- IsNext(AsmToken::Equal);
return false;
}
- void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
+ void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
auto &Int = Lexer.getTok();
int64_t Val = Int.getIntVal();
- if (IsNegative) Val = -Val;
+ if (IsNegative)
+ Val = -Val;
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, Int.getLoc(),
- Int.getEndLoc(), WebAssemblyOperand::IntOp{Val}));
+ WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
+ WebAssemblyOperand::IntOp{Val}));
Parser.Lex();
}
- bool ParseOperandStartingWithInteger(bool IsNegative,
- OperandVector &Operands,
- StringRef InstType) {
- ParseSingleInteger(IsNegative, Operands);
- if (Lexer.is(AsmToken::LParen)) {
- // Parse load/store operands of the form: offset($reg)align
- auto &LParen = Lexer.getTok();
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
- LParen.getLoc(),
- LParen.getEndLoc(),
- WebAssemblyOperand::TokOp{
- LParen.getString()}));
- Parser.Lex();
- if (Expect(AsmToken::Dollar, "register")) return true;
- if (ParseReg(Operands, InstType)) return true;
- auto &RParen = Lexer.getTok();
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
- RParen.getLoc(),
- RParen.getEndLoc(),
- WebAssemblyOperand::TokOp{
- RParen.getString()}));
- if (Expect(AsmToken::RParen, ")")) return true;
- if (Lexer.is(AsmToken::Integer)) {
- ParseSingleInteger(false, Operands);
+ bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
+ StringRef InstName) {
+ parseSingleInteger(IsNegative, Operands);
+ // FIXME: there is probably a cleaner way to do this.
+ auto IsLoadStore = InstName.startswith("load") ||
+ InstName.startswith("store") ||
+ InstName.startswith("atomic_load") ||
+ InstName.startswith("atomic_store");
+ if (IsLoadStore) {
+ // Parse load/store operands of the form: offset align
+ auto &Offset = Lexer.getTok();
+ if (Offset.is(AsmToken::Integer)) {
+ parseSingleInteger(false, Operands);
} else {
// Alignment not specified.
// FIXME: correctly derive a default from the instruction.
+ // We can't just call WebAssembly::GetDefaultP2Align since we don't have
+ // an opcode until after the assembly matcher.
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, RParen.getLoc(),
- RParen.getEndLoc(), WebAssemblyOperand::IntOp{0}));
+ WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
+ WebAssemblyOperand::IntOp{0}));
}
}
return false;
}
- bool ParseInstruction(ParseInstructionInfo &/*Info*/, StringRef Name,
+ void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
+ WebAssembly::ExprType BT) {
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Integer, NameLoc, NameLoc,
+ WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
+ }
+
+ bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override {
- Operands.push_back(
- make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, NameLoc,
- SMLoc::getFromPointer(
- NameLoc.getPointer() + Name.size()),
- WebAssemblyOperand::TokOp{
- StringRef(NameLoc.getPointer(),
- Name.size())}));
+ // Note: Name does NOT point into the sourcecode, but to a local, so
+ // use NameLoc instead.
+ Name = StringRef(NameLoc.getPointer(), Name.size());
+
+ // WebAssembly has instructions with / in them, which AsmLexer parses
+ // as seperate tokens, so if we find such tokens immediately adjacent (no
+ // whitespace), expand the name to include them:
+ for (;;) {
+ auto &Sep = Lexer.getTok();
+ if (Sep.getLoc().getPointer() != Name.end() ||
+ Sep.getKind() != AsmToken::Slash)
+ break;
+ // Extend name with /
+ Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
+ Parser.Lex();
+ // We must now find another identifier, or error.
+ auto &Id = Lexer.getTok();
+ if (Id.getKind() != AsmToken::Identifier ||
+ Id.getLoc().getPointer() != Name.end())
+ return error("Incomplete instruction name: ", Id);
+ Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
+ Parser.Lex();
+ }
+
+ // Now construct the name as first operand.
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
+ WebAssemblyOperand::TokOp{Name}));
auto NamePair = Name.split('.');
// If no '.', there is no type prefix.
- if (NamePair.second.empty()) std::swap(NamePair.first, NamePair.second);
+ auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
+
+ // If this instruction is part of a control flow structure, ensure
+ // proper nesting.
+ bool ExpectBlockType = false;
+ if (BaseName == "block") {
+ push(Block);
+ ExpectBlockType = true;
+ } else if (BaseName == "loop") {
+ push(Loop);
+ ExpectBlockType = true;
+ } else if (BaseName == "try") {
+ push(Try);
+ ExpectBlockType = true;
+ } else if (BaseName == "if") {
+ push(If);
+ ExpectBlockType = true;
+ } else if (BaseName == "else") {
+ if (pop(BaseName, If))
+ return true;
+ push(Else);
+ } else if (BaseName == "catch") {
+ if (pop(BaseName, Try))
+ return true;
+ push(Try);
+ } else if (BaseName == "catch_all") {
+ if (pop(BaseName, Try))
+ return true;
+ push(Try);
+ } else if (BaseName == "end_if") {
+ if (pop(BaseName, If, Else))
+ return true;
+ } else if (BaseName == "end_try") {
+ if (pop(BaseName, Try))
+ return true;
+ } else if (BaseName == "end_loop") {
+ if (pop(BaseName, Loop))
+ return true;
+ } else if (BaseName == "end_block") {
+ if (pop(BaseName, Block))
+ return true;
+ } else if (BaseName == "end_function") {
+ if (pop(BaseName, Function) || ensureEmptyNestingStack())
+ return true;
+ }
+
while (Lexer.isNot(AsmToken::EndOfStatement)) {
auto &Tok = Lexer.getTok();
switch (Tok.getKind()) {
- case AsmToken::Dollar: {
- Parser.Lex();
- if (ParseReg(Operands, NamePair.first)) return true;
- break;
- }
case AsmToken::Identifier: {
auto &Id = Lexer.getTok();
- const MCExpr *Val;
- SMLoc End;
- if (Parser.parsePrimaryExpr(Val, End))
- return Error("Cannot parse symbol: ", Lexer.getTok());
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Symbol, Id.getLoc(),
- Id.getEndLoc(), WebAssemblyOperand::SymOp{Val}));
+ if (ExpectBlockType) {
+ // Assume this identifier is a block_type.
+ auto BT = parseBlockType(Id.getString());
+ if (BT == WebAssembly::ExprType::Invalid)
+ return error("Unknown block type: ", Id);
+ addBlockTypeOperand(Operands, NameLoc, BT);
+ Parser.Lex();
+ } else {
+ // Assume this identifier is a label.
+ const MCExpr *Val;
+ SMLoc End;
+ if (Parser.parsePrimaryExpr(Val, End))
+ return error("Cannot parse symbol: ", Lexer.getTok());
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
+ WebAssemblyOperand::SymOp{Val}));
+ }
break;
}
case AsmToken::Minus:
Parser.Lex();
if (Lexer.isNot(AsmToken::Integer))
- return Error("Expected integer instead got: ", Lexer.getTok());
- if (ParseOperandStartingWithInteger(true, Operands, NamePair.first))
+ return error("Expected integer instead got: ", Lexer.getTok());
+ if (parseOperandStartingWithInteger(true, Operands, BaseName))
return true;
break;
case AsmToken::Integer:
- if (ParseOperandStartingWithInteger(false, Operands, NamePair.first))
+ if (parseOperandStartingWithInteger(false, Operands, BaseName))
return true;
break;
case AsmToken::Real: {
double Val;
if (Tok.getString().getAsDouble(Val, false))
- return Error("Cannot parse real: ", Tok);
+ return error("Cannot parse real: ", Tok);
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Float, Tok.getLoc(),
- Tok.getEndLoc(), WebAssemblyOperand::FltOp{Val}));
+ WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
+ WebAssemblyOperand::FltOp{Val}));
+ Parser.Lex();
+ break;
+ }
+ case AsmToken::LCurly: {
Parser.Lex();
+ auto Op = make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
+ if (!Lexer.is(AsmToken::RCurly))
+ for (;;) {
+ Op->BrL.List.push_back(Lexer.getTok().getIntVal());
+ expect(AsmToken::Integer, "integer");
+ if (!isNext(AsmToken::Comma))
+ break;
+ }
+ expect(AsmToken::RCurly, "}");
+ Operands.push_back(std::move(Op));
break;
}
default:
- return Error("Unexpected token in operand: ", Tok);
+ return error("Unexpected token in operand: ", Tok);
}
if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Expect(AsmToken::Comma, ",")) return true;
- }
- }
- Parser.Lex();
- // Call instructions are vararg, but the tablegen matcher doesn't seem to
- // support that, so for now we strip these extra operands.
- // This is problematic if these arguments are not simple $pop stack
- // registers, since e.g. a local register would get lost, so we check for
- // this. This can be the case when using -disable-wasm-explicit-locals
- // which currently s2wasm requires.
- // TODO: Instead, we can move this code to MatchAndEmitInstruction below and
- // actually generate get_local instructions on the fly.
- // Or even better, improve the matcher to support vararg?
- auto IsIndirect = NamePair.second == "call_indirect";
- if (IsIndirect || NamePair.second == "call") {
- // Figure out number of fixed operands from the instruction.
- size_t CallOperands = 1; // The name token.
- if (!IsIndirect) CallOperands++; // The function index.
- if (!NamePair.first.empty()) CallOperands++; // The result register.
- if (Operands.size() > CallOperands) {
- // Ensure operands we drop are all $pop.
- for (size_t I = CallOperands; I < Operands.size(); I++) {
- auto Operand =
- reinterpret_cast<WebAssemblyOperand *>(Operands[I].get());
- if (Operand->Kind != WebAssemblyOperand::Stack)
- Parser.Error(NameLoc,
- "Call instruction has non-stack arguments, if this code was "
- "generated with -disable-wasm-explicit-locals please remove it");
- }
- // Drop unneeded operands.
- Operands.resize(CallOperands);
+ if (expect(AsmToken::Comma, ","))
+ return true;
}
}
- // Block instructions require a signature index, but these are missing in
- // assembly, so we add a dummy one explicitly (since we have no control
- // over signature tables here, we assume these will be regenerated when
- // the wasm module is generated).
- if (NamePair.second == "block" || NamePair.second == "loop") {
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, NameLoc,
- NameLoc, WebAssemblyOperand::IntOp{-1}));
- }
- // These don't specify the type, which has to derived from the local index.
- if (NamePair.second == "get_local" || NamePair.second == "tee_local") {
- if (Operands.size() >= 3 && Operands[1]->isReg() &&
- Operands[2]->isImm()) {
- auto Op1 = reinterpret_cast<WebAssemblyOperand *>(Operands[1].get());
- auto Op2 = reinterpret_cast<WebAssemblyOperand *>(Operands[2].get());
- auto Type = GetType(LocalTypes, static_cast<size_t>(Op2->Int.Val));
- Op1->Reg.Type = Type;
- GetType(StackTypes, Op1->Reg.RegNo) = Type;
- }
+ if (ExpectBlockType && Operands.size() == 1) {
+ // Support blocks with no operands as default to void.
+ addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void);
}
+ Parser.Lex();
return false;
}
void onLabelParsed(MCSymbol *Symbol) override {
LastLabel = Symbol;
+ CurrentState = Label;
}
+ bool parseSignature(wasm::WasmSignature *Signature) {
+ if (expect(AsmToken::LParen, "("))
+ return true;
+ if (parseRegTypeList(Signature->Params))
+ return true;
+ if (expect(AsmToken::RParen, ")"))
+ return true;
+ if (expect(AsmToken::MinusGreater, "->"))
+ return true;
+ if (expect(AsmToken::LParen, "("))
+ return true;
+ if (parseRegTypeList(Signature->Returns))
+ return true;
+ if (expect(AsmToken::RParen, ")"))
+ return true;
+ return false;
+ }
+
+ // This function processes wasm-specific directives streamed to
+ // WebAssemblyTargetStreamer, all others go to the generic parser
+ // (see WasmAsmParser).
bool ParseDirective(AsmToken DirectiveID) override {
+ // This function has a really weird return value behavior that is different
+ // from all the other parsing functions:
+ // - return true && no tokens consumed -> don't know this directive / let
+ // the generic parser handle it.
+ // - return true && tokens consumed -> a parsing error occurred.
+ // - return false -> processed this directive successfully.
assert(DirectiveID.getKind() == AsmToken::Identifier);
auto &Out = getStreamer();
- auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
- *Out.getTargetStreamer());
- // TODO: we're just parsing the subset of directives we're interested in,
- // and ignoring ones we don't recognise. We should ideally verify
- // all directives here.
- if (DirectiveID.getString() == ".type") {
- // This could be the start of a function, check if followed by
- // "label,@function"
- if (!(IsNext(AsmToken::Identifier) &&
- IsNext(AsmToken::Comma) &&
- IsNext(AsmToken::At) &&
- Lexer.is(AsmToken::Identifier)))
- return Error("Expected label,@type declaration, got: ", Lexer.getTok());
- if (Lexer.getTok().getString() == "function") {
- // Track locals from start of function.
- LocalTypes.clear();
- StackTypes.clear();
- }
- Parser.Lex();
- //Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
- } else if (DirectiveID.getString() == ".param" ||
- DirectiveID.getString() == ".local") {
- // Track the number of locals, needed for correct virtual register
- // assignment elsewhere.
- // Also output a directive to the streamer.
- std::vector<MVT> Params;
- std::vector<MVT> Locals;
- while (Lexer.is(AsmToken::Identifier)) {
- auto RegType = ParseRegType(Lexer.getTok().getString());
- if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) return true;
- LocalTypes.push_back(RegType);
- if (DirectiveID.getString() == ".param") {
- Params.push_back(RegType);
- } else {
- Locals.push_back(RegType);
- }
- Parser.Lex();
- if (!IsNext(AsmToken::Comma)) break;
+ auto &TOut =
+ reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
+
+ // TODO: any time we return an error, at least one token must have been
+ // consumed, otherwise this will not signal an error to the caller.
+ if (DirectiveID.getString() == ".globaltype") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ if (expect(AsmToken::Comma, ","))
+ return true;
+ auto TypeTok = Lexer.getTok();
+ auto TypeName = expectIdent();
+ if (TypeName.empty())
+ return true;
+ auto Type = parseType(TypeName);
+ if (!Type)
+ return error("Unknown type in .globaltype directive: ", TypeTok);
+ // Now set this symbol with the correct type.
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ WasmSym->setGlobalType(
+ wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
+ // And emit the directive again.
+ TOut.emitGlobalType(WasmSym);
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".functype") {
+ // This code has to send things to the streamer similar to
+ // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
+ // TODO: would be good to factor this into a common function, but the
+ // assembler and backend really don't share any common code, and this code
+ // parses the locals seperately.
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ if (CurrentState == Label && WasmSym == LastLabel) {
+ // This .functype indicates a start of a function.
+ if (ensureEmptyNestingStack())
+ return true;
+ CurrentState = FunctionStart;
+ push(Function);
}
- assert(LastLabel);
- TOut.emitParam(LastLabel, Params);
+ auto Signature = make_unique<wasm::WasmSignature>();
+ if (parseSignature(Signature.get()))
+ return true;
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ TOut.emitFunctionType(WasmSym);
+ // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".eventtype") {
+ auto SymName = expectIdent();
+ if (SymName.empty())
+ return true;
+ auto WasmSym = cast<MCSymbolWasm>(
+ TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ auto Signature = make_unique<wasm::WasmSignature>();
+ if (parseRegTypeList(Signature->Params))
+ return true;
+ WasmSym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
+ TOut.emitEventType(WasmSym);
+ // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".local") {
+ if (CurrentState != FunctionStart)
+ return error(".local directive should follow the start of a function",
+ Lexer.getTok());
+ SmallVector<wasm::ValType, 4> Locals;
+ if (parseRegTypeList(Locals))
+ return true;
TOut.emitLocal(Locals);
- } else {
- // For now, ignore anydirective we don't recognize:
- while (Lexer.isNot(AsmToken::EndOfStatement)) Parser.Lex();
+ CurrentState = FunctionLocals;
+ return expect(AsmToken::EndOfStatement, "EOL");
}
- return Expect(AsmToken::EndOfStatement, "EOL");
+
+ return true; // We didn't process this directive.
}
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &/*Opcode*/,
- OperandVector &Operands,
- MCStreamer &Out, uint64_t &ErrorInfo,
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override {
MCInst Inst;
unsigned MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success: {
+ if (CurrentState == FunctionStart) {
+ // This is the first instruction in a function, but we haven't seen
+ // a .local directive yet. The streamer requires locals to be encoded
+ // as a prelude to the instructions, so emit an empty list of locals
+ // here.
+ auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
+ *Out.getTargetStreamer());
+ TOut.emitLocal(SmallVector<wasm::ValType, 0>());
+ }
+ CurrentState = Instructions;
Out.EmitInstruction(Inst, getSTI());
return false;
}
case Match_MissingFeature:
- return Parser.Error(IDLoc,
- "instruction requires a WASM feature not currently enabled");
+ return Parser.Error(
+ IDLoc, "instruction requires a WASM feature not currently enabled");
case Match_MnemonicFail:
return Parser.Error(IDLoc, "invalid instruction");
case Match_NearMisses:
@@ -547,6 +693,8 @@ public:
}
llvm_unreachable("Implement any new match types added!");
}
+
+ void onEndOfFile() override { ensureEmptyNestingStack(); }
};
} // end anonymous namespace