diff options
Diffstat (limited to 'lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp')
| -rw-r--r-- | lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp | 778 | 
1 files changed, 463 insertions, 315 deletions
diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 2d92b93ca7047..0a5908f437906 100644 --- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -18,13 +18,14 @@  #include "MCTargetDesc/WebAssemblyTargetStreamer.h"  #include "WebAssembly.h"  #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h"  #include "llvm/MC/MCInst.h"  #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h"  #include "llvm/MC/MCSubtargetInfo.h"  #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolWasm.h"  #include "llvm/Support/Endian.h"  #include "llvm/Support/TargetRegistry.h" @@ -34,27 +35,10 @@ using namespace llvm;  namespace { -// We store register types as SimpleValueType to retain SIMD layout -// information, but must also be able to supply them as the (unnamed) -// register enum from WebAssemblyRegisterInfo.td/.inc. -static unsigned MVTToWasmReg(MVT::SimpleValueType Type) { -  switch(Type) { -    case MVT::i32: return WebAssembly::I32_0; -    case MVT::i64: return WebAssembly::I64_0; -    case MVT::f32: return WebAssembly::F32_0; -    case MVT::f64: return WebAssembly::F64_0; -    case MVT::v16i8: return WebAssembly::V128_0; -    case MVT::v8i16: return WebAssembly::V128_0; -    case MVT::v4i32: return WebAssembly::V128_0; -    case MVT::v4f32: return WebAssembly::V128_0; -    default: return MVT::INVALID_SIMPLE_VALUE_TYPE; -  } -} -  /// WebAssemblyOperand - Instances of this class represent the operands in a  /// parsed WASM machine instruction.  struct WebAssemblyOperand : public MCParsedAsmOperand { -  enum KindTy { Token, Local, Stack, Integer, Float, Symbol } Kind; +  enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;    SMLoc StartLoc, EndLoc; @@ -62,19 +46,6 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {      StringRef Tok;    }; -  struct RegOp { -    // This is a (virtual) local or stack register represented as 0.. -    unsigned RegNo; -    // In most targets, the register number also encodes the type, but for -    // wasm we have to track that seperately since we have an unbounded -    // number of registers. -    // This has the unfortunate side effect that we supply a different value -    // to the table-gen matcher at different times in the process (when it -    // calls getReg() or addRegOperands(). -    // TODO: While this works, it feels brittle. and would be nice to clean up. -    MVT::SimpleValueType Type; -  }; -    struct IntOp {      int64_t Val;    }; @@ -87,37 +58,45 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {      const MCExpr *Exp;    }; +  struct BrLOp { +    std::vector<unsigned> List; +  }; +    union {      struct TokOp Tok; -    struct RegOp Reg;      struct IntOp Int;      struct FltOp Flt;      struct SymOp Sym; +    struct BrLOp BrL;    };    WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T) -    : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {} -  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, RegOp R) -    : Kind(K), StartLoc(Start), EndLoc(End), Reg(R) {} +      : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}    WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I) -    : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {} +      : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}    WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F) -    : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {} +      : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}    WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S) -    : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {} +      : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {} +  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End) +      : Kind(K), StartLoc(Start), EndLoc(End), BrL() {} + +  ~WebAssemblyOperand() { +    if (isBrList()) +      BrL.~BrLOp(); +  }    bool isToken() const override { return Kind == Token; } -  bool isImm() const override { return Kind == Integer || -                                       Kind == Float || -                                       Kind == Symbol; } -  bool isReg() const override { return Kind == Local || Kind == Stack; } +  bool isImm() const override { +    return Kind == Integer || Kind == Float || Kind == Symbol; +  }    bool isMem() const override { return false; } +  bool isReg() const override { return false; } +  bool isBrList() const { return Kind == BrList; }    unsigned getReg() const override { -    assert(isReg()); -    // This is called from the tablegen matcher (MatchInstructionImpl) -    // where it expects to match the type of register, see RegOp above. -    return MVTToWasmReg(Reg.Type); +    llvm_unreachable("Assembly inspects a register operand"); +    return 0;    }    StringRef getToken() const { @@ -128,19 +107,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {    SMLoc getStartLoc() const override { return StartLoc; }    SMLoc getEndLoc() const override { return EndLoc; } -  void addRegOperands(MCInst &Inst, unsigned N) const { -    assert(N == 1 && "Invalid number of operands!"); -    assert(isReg() && "Not a register operand!"); -    // This is called from the tablegen matcher (MatchInstructionImpl) -    // where it expects to output the actual register index, see RegOp above. -    unsigned R = Reg.RegNo; -    if (Kind == Stack) { -      // A stack register is represented as a large negative number. -      // See WebAssemblyRegNumbering::runOnMachineFunction and -      // getWARegStackId for why this | is needed. -      R |= INT32_MIN; -    } -    Inst.addOperand(MCOperand::createReg(R)); +  void addRegOperands(MCInst &, unsigned) const { +    // Required by the assembly matcher. +    llvm_unreachable("Assembly matcher creates register operands");    }    void addImmOperands(MCInst &Inst, unsigned N) const { @@ -155,17 +124,17 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {        llvm_unreachable("Should be immediate or symbol!");    } +  void addBrListOperands(MCInst &Inst, unsigned N) const { +    assert(N == 1 && isBrList() && "Invalid BrList!"); +    for (auto Br : BrL.List) +      Inst.addOperand(MCOperand::createImm(Br)); +  } +    void print(raw_ostream &OS) const override {      switch (Kind) {      case Token:        OS << "Tok:" << Tok.Tok;        break; -    case Local: -      OS << "Loc:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type); -      break; -    case Stack: -      OS << "Stk:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type); -      break;      case Integer:        OS << "Int:" << Int.Val;        break; @@ -175,6 +144,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {      case Symbol:        OS << "Sym:" << Sym.Exp;        break; +    case BrList: +      OS << "BrList:" << BrL.List.size(); +      break;      }    }  }; @@ -182,352 +154,526 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {  class WebAssemblyAsmParser final : public MCTargetAsmParser {    MCAsmParser &Parser;    MCAsmLexer &Lexer; -  // These are for the current function being parsed: -  // These are vectors since register assignments are so far non-sparse. -  // Replace by map if necessary. -  std::vector<MVT::SimpleValueType> LocalTypes; -  std::vector<MVT::SimpleValueType> StackTypes; -  MCSymbol *LastLabel; + +  // Much like WebAssemblyAsmPrinter in the backend, we have to own these. +  std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures; + +  // Order of labels, directives and instructions in a .s file have no +  // syntactical enforcement. This class is a callback from the actual parser, +  // and yet we have to be feeding data to the streamer in a very particular +  // order to ensure a correct binary encoding that matches the regular backend +  // (the streamer does not enforce this). This "state machine" enum helps +  // guarantee that correct order. +  enum ParserState { +    FileStart, +    Label, +    FunctionStart, +    FunctionLocals, +    Instructions, +  } CurrentState = FileStart; + +  // For ensuring blocks are properly nested. +  enum NestingType { +    Function, +    Block, +    Loop, +    Try, +    If, +    Else, +    Undefined, +  }; +  std::vector<NestingType> NestingStack; + +  // We track this to see if a .functype following a label is the same, +  // as this is how we recognize the start of a function. +  MCSymbol *LastLabel = nullptr;  public: -  WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, -                       const MCInstrInfo &mii, const MCTargetOptions &Options) -      : MCTargetAsmParser(Options, sti, mii), Parser(Parser), -        Lexer(Parser.getLexer()), LastLabel(nullptr) { +  WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, +                       const MCInstrInfo &MII, const MCTargetOptions &Options) +      : MCTargetAsmParser(Options, STI, MII), Parser(Parser), +        Lexer(Parser.getLexer()) { +    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));    }  #define GET_ASSEMBLER_HEADER  #include "WebAssemblyGenAsmMatcher.inc"    // TODO: This is required to be implemented, but appears unused. -  bool ParseRegister(unsigned &/*RegNo*/, SMLoc &/*StartLoc*/, -                     SMLoc &/*EndLoc*/) override { +  bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/, +                     SMLoc & /*EndLoc*/) override {      llvm_unreachable("ParseRegister is not implemented.");    } -  bool Error(const StringRef &msg, const AsmToken &tok) { -    return Parser.Error(tok.getLoc(), msg + tok.getString()); +  bool error(const Twine &Msg, const AsmToken &Tok) { +    return Parser.Error(Tok.getLoc(), Msg + Tok.getString()); +  } + +  bool error(const Twine &Msg) { +    return Parser.Error(Lexer.getTok().getLoc(), Msg); +  } + +  void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) { +    Signatures.push_back(std::move(Sig)); +  } + +  std::pair<StringRef, StringRef> nestingString(NestingType NT) { +    switch (NT) { +    case Function: +      return {"function", "end_function"}; +    case Block: +      return {"block", "end_block"}; +    case Loop: +      return {"loop", "end_loop"}; +    case Try: +      return {"try", "end_try"}; +    case If: +      return {"if", "end_if"}; +    case Else: +      return {"else", "end_if"}; +    default: +      llvm_unreachable("unknown NestingType"); +    } +  } + +  void push(NestingType NT) { NestingStack.push_back(NT); } + +  bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) { +    if (NestingStack.empty()) +      return error(Twine("End of block construct with no start: ") + Ins); +    auto Top = NestingStack.back(); +    if (Top != NT1 && Top != NT2) +      return error(Twine("Block construct type mismatch, expected: ") + +                   nestingString(Top).second + ", instead got: " + Ins); +    NestingStack.pop_back(); +    return false; +  } + +  bool ensureEmptyNestingStack() { +    auto err = !NestingStack.empty(); +    while (!NestingStack.empty()) { +      error(Twine("Unmatched block construct(s) at function end: ") + +            nestingString(NestingStack.back()).first); +      NestingStack.pop_back(); +    } +    return err;    } -  bool IsNext(AsmToken::TokenKind Kind) { -    auto ok = Lexer.is(Kind); -    if (ok) Parser.Lex(); -    return ok; +  bool isNext(AsmToken::TokenKind Kind) { +    auto Ok = Lexer.is(Kind); +    if (Ok) +      Parser.Lex(); +    return Ok;    } -  bool Expect(AsmToken::TokenKind Kind, const char *KindName) { -    if (!IsNext(Kind)) -      return Error(std::string("Expected ") + KindName + ", instead got: ", +  bool expect(AsmToken::TokenKind Kind, const char *KindName) { +    if (!isNext(Kind)) +      return error(std::string("Expected ") + KindName + ", instead got: ",                     Lexer.getTok());      return false;    } -  MVT::SimpleValueType ParseRegType(const StringRef &RegType) { -    // Derive type from .param .local decls, or the instruction itself. -    return StringSwitch<MVT::SimpleValueType>(RegType) -        .Case("i32", MVT::i32) -        .Case("i64", MVT::i64) -        .Case("f32", MVT::f32) -        .Case("f64", MVT::f64) -        .Case("i8x16", MVT::v16i8) -        .Case("i16x8", MVT::v8i16) -        .Case("i32x4", MVT::v4i32) -        .Case("f32x4", MVT::v4f32) -        .Default(MVT::INVALID_SIMPLE_VALUE_TYPE); +  StringRef expectIdent() { +    if (!Lexer.is(AsmToken::Identifier)) { +      error("Expected identifier, got: ", Lexer.getTok()); +      return StringRef(); +    } +    auto Name = Lexer.getTok().getString(); +    Parser.Lex(); +    return Name;    } -  MVT::SimpleValueType &GetType( -      std::vector<MVT::SimpleValueType> &Types, size_t i) { -    Types.resize(std::max(i + 1, Types.size()), MVT::INVALID_SIMPLE_VALUE_TYPE); -    return Types[i]; +  Optional<wasm::ValType> parseType(const StringRef &Type) { +    // FIXME: can't use StringSwitch because wasm::ValType doesn't have a +    // "invalid" value. +    if (Type == "i32") +      return wasm::ValType::I32; +    if (Type == "i64") +      return wasm::ValType::I64; +    if (Type == "f32") +      return wasm::ValType::F32; +    if (Type == "f64") +      return wasm::ValType::F64; +    if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || +        Type == "i32x4" || Type == "i64x2" || Type == "f32x4" || +        Type == "f64x2") +      return wasm::ValType::V128; +    return Optional<wasm::ValType>();    } -  bool ParseReg(OperandVector &Operands, StringRef TypePrefix) { -    if (Lexer.is(AsmToken::Integer)) { -      auto &Local = Lexer.getTok(); -      // This is a reference to a local, turn it into a virtual register. -      auto LocalNo = static_cast<unsigned>(Local.getIntVal()); -      Operands.push_back(make_unique<WebAssemblyOperand>( -                           WebAssemblyOperand::Local, Local.getLoc(), -                           Local.getEndLoc(), -                           WebAssemblyOperand::RegOp{LocalNo, -                               GetType(LocalTypes, LocalNo)})); -      Parser.Lex(); -    } else if (Lexer.is(AsmToken::Identifier)) { -      auto &StackRegTok = Lexer.getTok(); -      // These are push/pop/drop pseudo stack registers, which we turn -      // into virtual registers also. The stackify pass will later turn them -      // back into implicit stack references if possible. -      auto StackReg = StackRegTok.getString(); -      auto StackOp = StackReg.take_while([](char c) { return isalpha(c); }); -      auto Reg = StackReg.drop_front(StackOp.size()); -      unsigned long long ParsedRegNo = 0; -      if (!Reg.empty() && getAsUnsignedInteger(Reg, 10, ParsedRegNo)) -        return Error("Cannot parse stack register index: ", StackRegTok); -      unsigned RegNo = static_cast<unsigned>(ParsedRegNo); -      if (StackOp == "push") { -        // This defines a result, record register type. -        auto RegType = ParseRegType(TypePrefix); -        GetType(StackTypes, RegNo) = RegType; -        Operands.push_back(make_unique<WebAssemblyOperand>( -                             WebAssemblyOperand::Stack, -                             StackRegTok.getLoc(), -                             StackRegTok.getEndLoc(), -                             WebAssemblyOperand::RegOp{RegNo, RegType})); -      } else if (StackOp == "pop") { -        // This uses a previously defined stack value. -        auto RegType = GetType(StackTypes, RegNo); -        Operands.push_back(make_unique<WebAssemblyOperand>( -                             WebAssemblyOperand::Stack, -                             StackRegTok.getLoc(), -                             StackRegTok.getEndLoc(), -                             WebAssemblyOperand::RegOp{RegNo, RegType})); -      } else if (StackOp == "drop") { -        // This operand will be dropped, since it is part of an instruction -        // whose result is void. -      } else { -        return Error("Unknown stack register prefix: ", StackRegTok); -      } +  WebAssembly::ExprType parseBlockType(StringRef ID) { +    return StringSwitch<WebAssembly::ExprType>(ID) +        .Case("i32", WebAssembly::ExprType::I32) +        .Case("i64", WebAssembly::ExprType::I64) +        .Case("f32", WebAssembly::ExprType::F32) +        .Case("f64", WebAssembly::ExprType::F64) +        .Case("v128", WebAssembly::ExprType::V128) +        .Case("except_ref", WebAssembly::ExprType::ExceptRef) +        .Case("void", WebAssembly::ExprType::Void) +        .Default(WebAssembly::ExprType::Invalid); +  } + +  bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) { +    while (Lexer.is(AsmToken::Identifier)) { +      auto Type = parseType(Lexer.getTok().getString()); +      if (!Type) +        return true; +      Types.push_back(Type.getValue());        Parser.Lex(); -    } else { -      return Error( -            "Expected identifier/integer following $, instead got: ", -            Lexer.getTok()); +      if (!isNext(AsmToken::Comma)) +        break;      } -    IsNext(AsmToken::Equal);      return false;    } -  void ParseSingleInteger(bool IsNegative, OperandVector &Operands) { +  void parseSingleInteger(bool IsNegative, OperandVector &Operands) {      auto &Int = Lexer.getTok();      int64_t Val = Int.getIntVal(); -    if (IsNegative) Val = -Val; +    if (IsNegative) +      Val = -Val;      Operands.push_back(make_unique<WebAssemblyOperand>( -                         WebAssemblyOperand::Integer, Int.getLoc(), -                         Int.getEndLoc(), WebAssemblyOperand::IntOp{Val})); +        WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(), +        WebAssemblyOperand::IntOp{Val}));      Parser.Lex();    } -  bool ParseOperandStartingWithInteger(bool IsNegative, -                                       OperandVector &Operands, -                                       StringRef InstType) { -    ParseSingleInteger(IsNegative, Operands); -    if (Lexer.is(AsmToken::LParen)) { -      // Parse load/store operands of the form: offset($reg)align -      auto &LParen = Lexer.getTok(); -      Operands.push_back( -            make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, -                                            LParen.getLoc(), -                                            LParen.getEndLoc(), -                                            WebAssemblyOperand::TokOp{ -                                              LParen.getString()})); -      Parser.Lex(); -      if (Expect(AsmToken::Dollar, "register")) return true; -      if (ParseReg(Operands, InstType)) return true; -      auto &RParen = Lexer.getTok(); -      Operands.push_back( -            make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, -                                            RParen.getLoc(), -                                            RParen.getEndLoc(), -                                            WebAssemblyOperand::TokOp{ -                                              RParen.getString()})); -      if (Expect(AsmToken::RParen, ")")) return true; -      if (Lexer.is(AsmToken::Integer)) { -        ParseSingleInteger(false, Operands); +  bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands, +                                       StringRef InstName) { +    parseSingleInteger(IsNegative, Operands); +    // FIXME: there is probably a cleaner way to do this. +    auto IsLoadStore = InstName.startswith("load") || +                       InstName.startswith("store") || +                       InstName.startswith("atomic_load") || +                       InstName.startswith("atomic_store"); +    if (IsLoadStore) { +      // Parse load/store operands of the form: offset align +      auto &Offset = Lexer.getTok(); +      if (Offset.is(AsmToken::Integer)) { +        parseSingleInteger(false, Operands);        } else {          // Alignment not specified.          // FIXME: correctly derive a default from the instruction. +        // We can't just call WebAssembly::GetDefaultP2Align since we don't have +        // an opcode until after the assembly matcher.          Operands.push_back(make_unique<WebAssemblyOperand>( -                             WebAssemblyOperand::Integer, RParen.getLoc(), -                             RParen.getEndLoc(), WebAssemblyOperand::IntOp{0})); +            WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(), +            WebAssemblyOperand::IntOp{0}));        }      }      return false;    } -  bool ParseInstruction(ParseInstructionInfo &/*Info*/, StringRef Name, +  void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc, +                           WebAssembly::ExprType BT) { +    Operands.push_back(make_unique<WebAssemblyOperand>( +        WebAssemblyOperand::Integer, NameLoc, NameLoc, +        WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)})); +  } + +  bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,                          SMLoc NameLoc, OperandVector &Operands) override { -    Operands.push_back( -          make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, NameLoc, -                                          SMLoc::getFromPointer( -                                            NameLoc.getPointer() + Name.size()), -                                          WebAssemblyOperand::TokOp{ -                                            StringRef(NameLoc.getPointer(), -                                                    Name.size())})); +    // Note: Name does NOT point into the sourcecode, but to a local, so +    // use NameLoc instead. +    Name = StringRef(NameLoc.getPointer(), Name.size()); + +    // WebAssembly has instructions with / in them, which AsmLexer parses +    // as seperate tokens, so if we find such tokens immediately adjacent (no +    // whitespace), expand the name to include them: +    for (;;) { +      auto &Sep = Lexer.getTok(); +      if (Sep.getLoc().getPointer() != Name.end() || +          Sep.getKind() != AsmToken::Slash) +        break; +      // Extend name with / +      Name = StringRef(Name.begin(), Name.size() + Sep.getString().size()); +      Parser.Lex(); +      // We must now find another identifier, or error. +      auto &Id = Lexer.getTok(); +      if (Id.getKind() != AsmToken::Identifier || +          Id.getLoc().getPointer() != Name.end()) +        return error("Incomplete instruction name: ", Id); +      Name = StringRef(Name.begin(), Name.size() + Id.getString().size()); +      Parser.Lex(); +    } + +    // Now construct the name as first operand. +    Operands.push_back(make_unique<WebAssemblyOperand>( +        WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()), +        WebAssemblyOperand::TokOp{Name}));      auto NamePair = Name.split('.');      // If no '.', there is no type prefix. -    if (NamePair.second.empty()) std::swap(NamePair.first, NamePair.second); +    auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second; + +    // If this instruction is part of a control flow structure, ensure +    // proper nesting. +    bool ExpectBlockType = false; +    if (BaseName == "block") { +      push(Block); +      ExpectBlockType = true; +    } else if (BaseName == "loop") { +      push(Loop); +      ExpectBlockType = true; +    } else if (BaseName == "try") { +      push(Try); +      ExpectBlockType = true; +    } else if (BaseName == "if") { +      push(If); +      ExpectBlockType = true; +    } else if (BaseName == "else") { +      if (pop(BaseName, If)) +        return true; +      push(Else); +    } else if (BaseName == "catch") { +      if (pop(BaseName, Try)) +        return true; +      push(Try); +    } else if (BaseName == "catch_all") { +      if (pop(BaseName, Try)) +        return true; +      push(Try); +    } else if (BaseName == "end_if") { +      if (pop(BaseName, If, Else)) +        return true; +    } else if (BaseName == "end_try") { +      if (pop(BaseName, Try)) +        return true; +    } else if (BaseName == "end_loop") { +      if (pop(BaseName, Loop)) +        return true; +    } else if (BaseName == "end_block") { +      if (pop(BaseName, Block)) +        return true; +    } else if (BaseName == "end_function") { +      if (pop(BaseName, Function) || ensureEmptyNestingStack()) +        return true; +    } +      while (Lexer.isNot(AsmToken::EndOfStatement)) {        auto &Tok = Lexer.getTok();        switch (Tok.getKind()) { -      case AsmToken::Dollar: { -        Parser.Lex(); -        if (ParseReg(Operands, NamePair.first)) return true; -        break; -      }        case AsmToken::Identifier: {          auto &Id = Lexer.getTok(); -        const MCExpr *Val; -        SMLoc End; -        if (Parser.parsePrimaryExpr(Val, End)) -          return Error("Cannot parse symbol: ", Lexer.getTok()); -        Operands.push_back(make_unique<WebAssemblyOperand>( -                             WebAssemblyOperand::Symbol, Id.getLoc(), -                             Id.getEndLoc(), WebAssemblyOperand::SymOp{Val})); +        if (ExpectBlockType) { +          // Assume this identifier is a block_type. +          auto BT = parseBlockType(Id.getString()); +          if (BT == WebAssembly::ExprType::Invalid) +            return error("Unknown block type: ", Id); +          addBlockTypeOperand(Operands, NameLoc, BT); +          Parser.Lex(); +        } else { +          // Assume this identifier is a label. +          const MCExpr *Val; +          SMLoc End; +          if (Parser.parsePrimaryExpr(Val, End)) +            return error("Cannot parse symbol: ", Lexer.getTok()); +          Operands.push_back(make_unique<WebAssemblyOperand>( +              WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(), +              WebAssemblyOperand::SymOp{Val})); +        }          break;        }        case AsmToken::Minus:          Parser.Lex();          if (Lexer.isNot(AsmToken::Integer)) -          return Error("Expected integer instead got: ", Lexer.getTok()); -        if (ParseOperandStartingWithInteger(true, Operands, NamePair.first)) +          return error("Expected integer instead got: ", Lexer.getTok()); +        if (parseOperandStartingWithInteger(true, Operands, BaseName))            return true;          break;        case AsmToken::Integer: -        if (ParseOperandStartingWithInteger(false, Operands, NamePair.first)) +        if (parseOperandStartingWithInteger(false, Operands, BaseName))            return true;          break;        case AsmToken::Real: {          double Val;          if (Tok.getString().getAsDouble(Val, false)) -          return Error("Cannot parse real: ", Tok); +          return error("Cannot parse real: ", Tok);          Operands.push_back(make_unique<WebAssemblyOperand>( -                             WebAssemblyOperand::Float, Tok.getLoc(), -                             Tok.getEndLoc(), WebAssemblyOperand::FltOp{Val})); +            WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(), +            WebAssemblyOperand::FltOp{Val})); +        Parser.Lex(); +        break; +      } +      case AsmToken::LCurly: {          Parser.Lex(); +        auto Op = make_unique<WebAssemblyOperand>( +            WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc()); +        if (!Lexer.is(AsmToken::RCurly)) +          for (;;) { +            Op->BrL.List.push_back(Lexer.getTok().getIntVal()); +            expect(AsmToken::Integer, "integer"); +            if (!isNext(AsmToken::Comma)) +              break; +          } +        expect(AsmToken::RCurly, "}"); +        Operands.push_back(std::move(Op));          break;        }        default: -        return Error("Unexpected token in operand: ", Tok); +        return error("Unexpected token in operand: ", Tok);        }        if (Lexer.isNot(AsmToken::EndOfStatement)) { -        if (Expect(AsmToken::Comma, ",")) return true; -      } -    } -    Parser.Lex(); -    // Call instructions are vararg, but the tablegen matcher doesn't seem to -    // support that, so for now we strip these extra operands. -    // This is problematic if these arguments are not simple $pop stack -    // registers, since e.g. a local register would get lost, so we check for -    // this. This can be the case when using -disable-wasm-explicit-locals -    // which currently s2wasm requires. -    // TODO: Instead, we can move this code to MatchAndEmitInstruction below and -    // actually generate get_local instructions on the fly. -    // Or even better, improve the matcher to support vararg? -    auto IsIndirect = NamePair.second == "call_indirect"; -    if (IsIndirect || NamePair.second == "call") { -      // Figure out number of fixed operands from the instruction. -      size_t CallOperands = 1;  // The name token. -      if (!IsIndirect) CallOperands++;  // The function index. -      if (!NamePair.first.empty()) CallOperands++;  // The result register. -      if (Operands.size() > CallOperands) { -        // Ensure operands we drop are all $pop. -        for (size_t I = CallOperands; I < Operands.size(); I++) { -          auto Operand = -              reinterpret_cast<WebAssemblyOperand *>(Operands[I].get()); -          if (Operand->Kind != WebAssemblyOperand::Stack) -            Parser.Error(NameLoc, -              "Call instruction has non-stack arguments, if this code was " -              "generated with -disable-wasm-explicit-locals please remove it"); -        } -        // Drop unneeded operands. -        Operands.resize(CallOperands); +        if (expect(AsmToken::Comma, ",")) +          return true;        }      } -    // Block instructions require a signature index, but these are missing in -    // assembly, so we add a dummy one explicitly (since we have no control -    // over signature tables here, we assume these will be regenerated when -    // the wasm module is generated). -    if (NamePair.second == "block" || NamePair.second == "loop") { -      Operands.push_back(make_unique<WebAssemblyOperand>( -                           WebAssemblyOperand::Integer, NameLoc, -                           NameLoc, WebAssemblyOperand::IntOp{-1})); -    } -    // These don't specify the type, which has to derived from the local index. -    if (NamePair.second == "get_local" || NamePair.second == "tee_local") { -      if (Operands.size() >= 3 && Operands[1]->isReg() && -          Operands[2]->isImm()) { -        auto Op1 = reinterpret_cast<WebAssemblyOperand *>(Operands[1].get()); -        auto Op2 = reinterpret_cast<WebAssemblyOperand *>(Operands[2].get()); -        auto Type = GetType(LocalTypes, static_cast<size_t>(Op2->Int.Val)); -        Op1->Reg.Type = Type; -        GetType(StackTypes, Op1->Reg.RegNo) = Type; -      } +    if (ExpectBlockType && Operands.size() == 1) { +      // Support blocks with no operands as default to void. +      addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void);      } +    Parser.Lex();      return false;    }    void onLabelParsed(MCSymbol *Symbol) override {      LastLabel = Symbol; +    CurrentState = Label;    } +  bool parseSignature(wasm::WasmSignature *Signature) { +    if (expect(AsmToken::LParen, "(")) +      return true; +    if (parseRegTypeList(Signature->Params)) +      return true; +    if (expect(AsmToken::RParen, ")")) +      return true; +    if (expect(AsmToken::MinusGreater, "->")) +      return true; +    if (expect(AsmToken::LParen, "(")) +      return true; +    if (parseRegTypeList(Signature->Returns)) +      return true; +    if (expect(AsmToken::RParen, ")")) +      return true; +    return false; +  } + +  // This function processes wasm-specific directives streamed to +  // WebAssemblyTargetStreamer, all others go to the generic parser +  // (see WasmAsmParser).    bool ParseDirective(AsmToken DirectiveID) override { +    // This function has a really weird return value behavior that is different +    // from all the other parsing functions: +    // - return true && no tokens consumed -> don't know this directive / let +    //   the generic parser handle it. +    // - return true && tokens consumed -> a parsing error occurred. +    // - return false -> processed this directive successfully.      assert(DirectiveID.getKind() == AsmToken::Identifier);      auto &Out = getStreamer(); -    auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>( -                   *Out.getTargetStreamer()); -    // TODO: we're just parsing the subset of directives we're interested in, -    // and ignoring ones we don't recognise. We should ideally verify -    // all directives here. -    if (DirectiveID.getString() == ".type") { -      // This could be the start of a function, check if followed by -      // "label,@function" -      if (!(IsNext(AsmToken::Identifier) && -            IsNext(AsmToken::Comma) && -            IsNext(AsmToken::At) && -            Lexer.is(AsmToken::Identifier))) -        return Error("Expected label,@type declaration, got: ", Lexer.getTok()); -      if (Lexer.getTok().getString() == "function") { -        // Track locals from start of function. -        LocalTypes.clear(); -        StackTypes.clear(); -      } -      Parser.Lex(); -      //Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction); -    } else if (DirectiveID.getString() == ".param" || -               DirectiveID.getString() == ".local") { -      // Track the number of locals, needed for correct virtual register -      // assignment elsewhere. -      // Also output a directive to the streamer. -      std::vector<MVT> Params; -      std::vector<MVT> Locals; -      while (Lexer.is(AsmToken::Identifier)) { -        auto RegType = ParseRegType(Lexer.getTok().getString()); -        if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) return true; -        LocalTypes.push_back(RegType); -        if (DirectiveID.getString() == ".param") { -          Params.push_back(RegType); -        } else { -          Locals.push_back(RegType); -        } -        Parser.Lex(); -        if (!IsNext(AsmToken::Comma)) break; +    auto &TOut = +        reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer()); + +    // TODO: any time we return an error, at least one token must have been +    // consumed, otherwise this will not signal an error to the caller. +    if (DirectiveID.getString() == ".globaltype") { +      auto SymName = expectIdent(); +      if (SymName.empty()) +        return true; +      if (expect(AsmToken::Comma, ",")) +        return true; +      auto TypeTok = Lexer.getTok(); +      auto TypeName = expectIdent(); +      if (TypeName.empty()) +        return true; +      auto Type = parseType(TypeName); +      if (!Type) +        return error("Unknown type in .globaltype directive: ", TypeTok); +      // Now set this symbol with the correct type. +      auto WasmSym = cast<MCSymbolWasm>( +          TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); +      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); +      WasmSym->setGlobalType( +          wasm::WasmGlobalType{uint8_t(Type.getValue()), true}); +      // And emit the directive again. +      TOut.emitGlobalType(WasmSym); +      return expect(AsmToken::EndOfStatement, "EOL"); +    } + +    if (DirectiveID.getString() == ".functype") { +      // This code has to send things to the streamer similar to +      // WebAssemblyAsmPrinter::EmitFunctionBodyStart. +      // TODO: would be good to factor this into a common function, but the +      // assembler and backend really don't share any common code, and this code +      // parses the locals seperately. +      auto SymName = expectIdent(); +      if (SymName.empty()) +        return true; +      auto WasmSym = cast<MCSymbolWasm>( +          TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); +      if (CurrentState == Label && WasmSym == LastLabel) { +        // This .functype indicates a start of a function. +        if (ensureEmptyNestingStack()) +          return true; +        CurrentState = FunctionStart; +        push(Function);        } -      assert(LastLabel); -      TOut.emitParam(LastLabel, Params); +      auto Signature = make_unique<wasm::WasmSignature>(); +      if (parseSignature(Signature.get())) +        return true; +      WasmSym->setSignature(Signature.get()); +      addSignature(std::move(Signature)); +      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); +      TOut.emitFunctionType(WasmSym); +      // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. +      return expect(AsmToken::EndOfStatement, "EOL"); +    } + +    if (DirectiveID.getString() == ".eventtype") { +      auto SymName = expectIdent(); +      if (SymName.empty()) +        return true; +      auto WasmSym = cast<MCSymbolWasm>( +          TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); +      auto Signature = make_unique<wasm::WasmSignature>(); +      if (parseRegTypeList(Signature->Params)) +        return true; +      WasmSym->setSignature(Signature.get()); +      addSignature(std::move(Signature)); +      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT); +      TOut.emitEventType(WasmSym); +      // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. +      return expect(AsmToken::EndOfStatement, "EOL"); +    } + +    if (DirectiveID.getString() == ".local") { +      if (CurrentState != FunctionStart) +        return error(".local directive should follow the start of a function", +                     Lexer.getTok()); +      SmallVector<wasm::ValType, 4> Locals; +      if (parseRegTypeList(Locals)) +        return true;        TOut.emitLocal(Locals); -    } else { -      // For now, ignore anydirective we don't recognize: -      while (Lexer.isNot(AsmToken::EndOfStatement)) Parser.Lex(); +      CurrentState = FunctionLocals; +      return expect(AsmToken::EndOfStatement, "EOL");      } -    return Expect(AsmToken::EndOfStatement, "EOL"); + +    return true; // We didn't process this directive.    } -  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &/*Opcode*/, -                               OperandVector &Operands, -                               MCStreamer &Out, uint64_t &ErrorInfo, +  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, +                               OperandVector &Operands, MCStreamer &Out, +                               uint64_t &ErrorInfo,                                 bool MatchingInlineAsm) override {      MCInst Inst;      unsigned MatchResult =          MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);      switch (MatchResult) {      case Match_Success: { +      if (CurrentState == FunctionStart) { +        // This is the first instruction in a function, but we haven't seen +        // a .local directive yet. The streamer requires locals to be encoded +        // as a prelude to the instructions, so emit an empty list of locals +        // here. +        auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>( +            *Out.getTargetStreamer()); +        TOut.emitLocal(SmallVector<wasm::ValType, 0>()); +      } +      CurrentState = Instructions;        Out.EmitInstruction(Inst, getSTI());        return false;      }      case Match_MissingFeature: -      return Parser.Error(IDLoc, -          "instruction requires a WASM feature not currently enabled"); +      return Parser.Error( +          IDLoc, "instruction requires a WASM feature not currently enabled");      case Match_MnemonicFail:        return Parser.Error(IDLoc, "invalid instruction");      case Match_NearMisses: @@ -547,6 +693,8 @@ public:      }      llvm_unreachable("Implement any new match types added!");    } + +  void onEndOfFile() override { ensureEmptyNestingStack(); }  };  } // end anonymous namespace  | 
