diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/WebAssembly')
50 files changed, 2599 insertions, 1759 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index e29d85d7588d..60ac3248b9e7 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -35,10 +35,12 @@ using namespace llvm; #define DEBUG_TYPE "wasm-asm-parser" +static const char *getSubtargetFeatureName(uint64_t Val); + namespace { /// WebAssemblyOperand - Instances of this class represent the operands in a -/// parsed WASM machine instruction. +/// parsed Wasm machine instruction. struct WebAssemblyOperand : public MCParsedAsmOperand { enum KindTy { Token, Integer, Float, Symbol, BrList } Kind; @@ -158,6 +160,24 @@ struct WebAssemblyOperand : public MCParsedAsmOperand { } }; +static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx, + const StringRef &Name) { + // FIXME: Duplicates functionality from + // MC/WasmObjectWriter::recordRelocation, as well as WebAssemblyCodegen's + // WebAssembly:getOrCreateFunctionTableSymbol. + MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name)); + if (Sym) { + if (!Sym->isFunctionTable()) + Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table"); + } else { + Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name)); + Sym->setFunctionTable(); + // The default function table is synthesized by the linker. + Sym->setUndefined(); + } + return Sym; +} + class WebAssemblyAsmParser final : public MCTargetAsmParser { MCAsmParser &Parser; MCAsmLexer &Lexer; @@ -320,8 +340,8 @@ public: Type == "i32x4" || Type == "i64x2" || Type == "f32x4" || Type == "f64x2") return wasm::ValType::V128; - if (Type == "exnref") - return wasm::ValType::EXNREF; + if (Type == "funcref") + return wasm::ValType::FUNCREF; if (Type == "externref") return wasm::ValType::EXTERNREF; return Optional<wasm::ValType>(); @@ -335,7 +355,8 @@ public: .Case("f32", WebAssembly::BlockType::F32) .Case("f64", WebAssembly::BlockType::F64) .Case("v128", WebAssembly::BlockType::V128) - .Case("exnref", WebAssembly::BlockType::Exnref) + .Case("funcref", WebAssembly::BlockType::Funcref) + .Case("externref", WebAssembly::BlockType::Externref) .Case("void", WebAssembly::BlockType::Void) .Default(WebAssembly::BlockType::Invalid); } @@ -403,7 +424,8 @@ public: bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) { // FIXME: there is probably a cleaner way to do this. auto IsLoadStore = InstName.find(".load") != StringRef::npos || - InstName.find(".store") != StringRef::npos; + InstName.find(".store") != StringRef::npos || + InstName.find("prefetch") != StringRef::npos; auto IsAtomic = InstName.find("atomic.") != StringRef::npos; if (IsLoadStore || IsAtomic) { // Parse load/store operands of the form: offset:p2align=align @@ -417,6 +439,12 @@ public: return error("Expected integer constant"); parseSingleInteger(false, Operands); } else { + // v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane + // index. We need to avoid parsing an extra alignment operand for the + // lane index. + auto IsLoadStoreLane = InstName.find("_lane") != StringRef::npos; + if (IsLoadStoreLane && Operands.size() == 4) + return false; // Alignment not specified (or atomics, must use default alignment). // We can't just call WebAssembly::GetDefaultP2Align since we don't have // an opcode until after the assembly matcher, so set a default to fix @@ -430,6 +458,13 @@ public: return false; } + WebAssembly::HeapType parseHeapType(StringRef Id) { + return StringSwitch<WebAssembly::HeapType>(Id) + .Case("extern", WebAssembly::HeapType::Externref) + .Case("func", WebAssembly::HeapType::Funcref) + .Default(WebAssembly::HeapType::Invalid); + } + void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc, WebAssembly::BlockType BT) { Operands.push_back(std::make_unique<WebAssemblyOperand>( @@ -472,6 +507,7 @@ public: // proper nesting. bool ExpectBlockType = false; bool ExpectFuncType = false; + bool ExpectHeapType = false; if (Name == "block") { push(Block); ExpectBlockType = true; @@ -511,6 +547,17 @@ public: return true; } else if (Name == "call_indirect" || Name == "return_call_indirect") { ExpectFuncType = true; + // Ensure that the object file has a __indirect_function_table import, as + // we call_indirect against it. + auto &Ctx = getStreamer().getContext(); + MCSymbolWasm *Sym = + GetOrCreateFunctionTableSymbol(Ctx, "__indirect_function_table"); + // Until call_indirect emits TABLE_NUMBER relocs against this symbol, mark + // it as NO_STRIP so as to ensure that the indirect function table makes + // it to linked output. + Sym->setNoStrip(); + } else if (Name == "ref.null") { + ExpectHeapType = true; } if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) { @@ -552,6 +599,15 @@ public: return error("Unknown block type: ", Id); addBlockTypeOperand(Operands, NameLoc, BT); Parser.Lex(); + } else if (ExpectHeapType) { + auto HeapType = parseHeapType(Id.getString()); + if (HeapType == WebAssembly::HeapType::Invalid) { + return error("Expected a heap type: ", Id); + } + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Integer, Id.getLoc(), Id.getEndLoc(), + WebAssemblyOperand::IntOp{static_cast<int64_t>(HeapType)})); + Parser.Lex(); } else { // Assume this identifier is a label. const MCExpr *Val; @@ -687,16 +743,52 @@ public: auto Type = parseType(TypeName); if (!Type) return error("Unknown type in .globaltype directive: ", TypeTok); + // Optional mutable modifier. Default to mutable for historical reasons. + // Ideally we would have gone with immutable as the default and used `mut` + // as the modifier to match the `.wat` format. + bool Mutable = true; + if (isNext(AsmToken::Comma)) { + TypeTok = Lexer.getTok(); + auto Id = expectIdent(); + if (Id == "immutable") + Mutable = false; + else + // Should we also allow `mutable` and `mut` here for clarity? + return error("Unknown type in .globaltype modifier: ", TypeTok); + } // Now set this symbol with the correct type. auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType( - wasm::WasmGlobalType{uint8_t(Type.getValue()), true}); + wasm::WasmGlobalType{uint8_t(Type.getValue()), Mutable}); // And emit the directive again. TOut.emitGlobalType(WasmSym); return expect(AsmToken::EndOfStatement, "EOL"); } + if (DirectiveID.getString() == ".tabletype") { + auto SymName = expectIdent(); + if (SymName.empty()) + return true; + if (expect(AsmToken::Comma, ",")) + return true; + auto TypeTok = Lexer.getTok(); + auto TypeName = expectIdent(); + if (TypeName.empty()) + return true; + auto Type = parseType(TypeName); + if (!Type) + return error("Unknown type in .tabletype directive: ", TypeTok); + + // Now that we have the name and table type, we can actually create the + // symbol + auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE); + WasmSym->setTableType(Type.getValue()); + TOut.emitTableType(WasmSym); + return expect(AsmToken::EndOfStatement, "EOL"); + } + if (DirectiveID.getString() == ".functype") { // This code has to send things to the streamer similar to // WebAssemblyAsmPrinter::EmitFunctionBodyStart. @@ -836,8 +928,9 @@ public: bool MatchingInlineAsm) override { MCInst Inst; Inst.setLoc(IDLoc); - unsigned MatchResult = - MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + FeatureBitset MissingFeatures; + unsigned MatchResult = MatchInstructionImpl( + Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm); switch (MatchResult) { case Match_Success: { ensureLocals(Out); @@ -866,9 +959,16 @@ public: } return false; } - case Match_MissingFeature: - return Parser.Error( - IDLoc, "instruction requires a WASM feature not currently enabled"); + case Match_MissingFeature: { + assert(MissingFeatures.count() > 0 && "Expected missing features"); + SmallString<128> Message; + raw_svector_ostream OS(Message); + OS << "instruction requires:"; + for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) + if (MissingFeatures.test(i)) + OS << ' ' << getSubtargetFeatureName(i); + return Parser.Error(IDLoc, Message); + } case Match_MnemonicFail: return Parser.Error(IDLoc, "invalid instruction"); case Match_NearMisses: @@ -896,12 +996,27 @@ public: auto SymName = Symbol->getName(); if (SymName.startswith(".L")) return; // Local Symbol. + // Only create a new text section if we're already in one. + // TODO: If the user explicitly creates a new function section, we ignore + // its name when we create this one. It would be nice to honor their + // choice, while still ensuring that we create one if they forget. + // (that requires coordination with WasmAsmParser::parseSectionDirective) auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first); if (!CWS || !CWS->getKind().isText()) return; auto SecName = ".text." + SymName; - auto WS = getContext().getWasmSection(SecName, SectionKind::getText()); + + auto *Group = CWS->getGroup(); + // If the current section is a COMDAT, also set the flag on the symbol. + // TODO: Currently the only place that the symbols' comdat flag matters is + // for importing comdat functions. But there's no way to specify that in + // assembly currently. + if (Group) + cast<MCSymbolWasm>(Symbol)->setComdat(true); + auto *WS = + getContext().getWasmSection(SecName, SectionKind::getText(), Group, + MCContext::GenericSectionID, nullptr); getStreamer().SwitchSection(WS); // Also generate DWARF for this section if requested. if (getContext().getGenDwarfForAssembly()) @@ -932,5 +1047,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() { } #define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION #include "WebAssemblyGenAsmMatcher.inc" diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index 42fa6d58fffd..1b7cc093f7ad 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -198,6 +198,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( case WebAssembly::OPERAND_LOCAL: case WebAssembly::OPERAND_GLOBAL: case WebAssembly::OPERAND_FUNCTION32: + case WebAssembly::OPERAND_TABLE: case WebAssembly::OPERAND_OFFSET32: case WebAssembly::OPERAND_OFFSET64: case WebAssembly::OPERAND_P2ALIGN: @@ -240,6 +241,28 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( } break; } + // heap_type operands, for e.g. ref.null: + case WebAssembly::OPERAND_HEAPTYPE: { + int64_t Val; + uint64_t PrevSize = Size; + if (!nextLEB(Val, Bytes, Size, true)) + return MCDisassembler::Fail; + if (Val < 0 && Size == PrevSize + 1) { + // The HeapType encoding is like BlockType, in that encodings that + // decode as negative values indicate ValTypes. In practice we expect + // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here. + // + // The positive SLEB values are reserved for future expansion and are + // expected to be type indices in the typed function references + // proposal, and should disassemble as MCSymbolRefExpr as in BlockType + // above. + MI.addOperand(MCOperand::createImm(Val & 0x7f)); + } else { + MI.addOperand( + MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid))); + } + break; + } // FP operands. case WebAssembly::OPERAND_F32IMM: { if (!parseImmediate<float>(MI, Size, Bytes)) diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 8ecd7c53621d..d88311197c1a 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -59,11 +59,6 @@ public: return false; } - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } - bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index f60b5fcd14ec..fb8b0c364f30 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -94,19 +94,18 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, case WebAssembly::LOOP_S: printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':'); ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true)); - break; + return; case WebAssembly::BLOCK: case WebAssembly::BLOCK_S: ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false)); - break; + return; case WebAssembly::TRY: case WebAssembly::TRY_S: - ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false)); - EHPadStack.push_back(EHPadStackCounter++); - LastSeenEHInst = TRY; - break; + ControlFlowStack.push_back(std::make_pair(ControlFlowCounter, false)); + EHPadStack.push_back(ControlFlowCounter++); + return; case WebAssembly::END_LOOP: case WebAssembly::END_LOOP_S: @@ -115,7 +114,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, } else { ControlFlowStack.pop_back(); } - break; + return; case WebAssembly::END_BLOCK: case WebAssembly::END_BLOCK_S: @@ -125,7 +124,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation( OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':'); } - break; + return; case WebAssembly::END_TRY: case WebAssembly::END_TRY_S: @@ -134,60 +133,60 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, } else { printAnnotation( OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':'); - LastSeenEHInst = END_TRY; } - break; + return; case WebAssembly::CATCH: case WebAssembly::CATCH_S: + case WebAssembly::CATCH_ALL: + case WebAssembly::CATCH_ALL_S: if (EHPadStack.empty()) { printAnnotation(OS, "try-catch mismatch!"); } else { printAnnotation(OS, "catch" + utostr(EHPadStack.pop_back_val()) + ':'); } - break; - } - - // Annotate any control flow label references. + return; - // rethrow instruction does not take any depth argument and rethrows to the - // nearest enclosing catch scope, if any. If there's no enclosing catch - // scope, it throws up to the caller. - if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) { + case WebAssembly::RETHROW: + case WebAssembly::RETHROW_S: + // 'rethrow' rethrows to the nearest enclosing catch scope, if any. If + // there's no enclosing catch scope, it throws up to the caller. if (EHPadStack.empty()) { printAnnotation(OS, "to caller"); } else { printAnnotation(OS, "down to catch" + utostr(EHPadStack.back())); } + return; + } - } else { - unsigned NumFixedOperands = Desc.NumOperands; - SmallSet<uint64_t, 8> Printed; - for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) { - // See if this operand denotes a basic block target. - if (I < NumFixedOperands) { - // A non-variable_ops operand, check its type. - if (Desc.OpInfo[I].OperandType != WebAssembly::OPERAND_BASIC_BLOCK) - continue; - } else { - // A variable_ops operand, which currently can be immediates (used in - // br_table) which are basic block targets, or for call instructions - // when using -wasm-keep-registers (in which case they are registers, - // and should not be processed). - if (!MI->getOperand(I).isImm()) - continue; - } - uint64_t Depth = MI->getOperand(I).getImm(); - if (!Printed.insert(Depth).second) + // Annotate any control flow label references. + + unsigned NumFixedOperands = Desc.NumOperands; + SmallSet<uint64_t, 8> Printed; + for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) { + // See if this operand denotes a basic block target. + if (I < NumFixedOperands) { + // A non-variable_ops operand, check its type. + if (Desc.OpInfo[I].OperandType != WebAssembly::OPERAND_BASIC_BLOCK) continue; - if (Depth >= ControlFlowStack.size()) { - printAnnotation(OS, "Invalid depth argument!"); - } else { - const auto &Pair = ControlFlowStack.rbegin()[Depth]; - printAnnotation(OS, utostr(Depth) + ": " + - (Pair.second ? "up" : "down") + " to label" + - utostr(Pair.first)); - } + } else { + // A variable_ops operand, which currently can be immediates (used in + // br_table) which are basic block targets, or for call instructions + // when using -wasm-keep-registers (in which case they are registers, + // and should not be processed). + if (!MI->getOperand(I).isImm()) + continue; + } + uint64_t Depth = MI->getOperand(I).getImm(); + if (!Printed.insert(Depth).second) + continue; + if (Depth >= ControlFlowStack.size()) { + printAnnotation(OS, "Invalid depth argument!"); + } else { + const auto &Pair = ControlFlowStack.rbegin()[Depth]; + printAnnotation(OS, utostr(Depth) + ": " + + (Pair.second ? "up" : "down") + " to label" + + utostr(Pair.first)); } } } @@ -302,6 +301,29 @@ void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI, } } +void WebAssemblyInstPrinter::printWebAssemblyHeapTypeOperand(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + switch (Op.getImm()) { + case long(wasm::ValType::EXTERNREF): + O << "extern"; + break; + case long(wasm::ValType::FUNCREF): + O << "func"; + break; + default: + O << "unsupported_heap_type_value"; + break; + } + } else { + // Typed function references and other subtypes of funcref and externref + // currently unimplemented. + O << "unsupported_heap_type_operand"; + } +} + // We have various enums representing a subset of these types, use this // function to convert any of them to text. const char *WebAssembly::anyTypeToString(unsigned Ty) { @@ -318,10 +340,10 @@ const char *WebAssembly::anyTypeToString(unsigned Ty) { return "v128"; case wasm::WASM_TYPE_FUNCREF: return "funcref"; + case wasm::WASM_TYPE_EXTERNREF: + return "externref"; case wasm::WASM_TYPE_FUNC: return "func"; - case wasm::WASM_TYPE_EXNREF: - return "exnref"; case wasm::WASM_TYPE_NORESULT: return "void"; default: diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h index 1387a1928b3f..2ed6d562acff 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h @@ -25,13 +25,9 @@ class MCSubtargetInfo; class WebAssemblyInstPrinter final : public MCInstPrinter { uint64_t ControlFlowCounter = 0; - uint64_t EHPadStackCounter = 0; SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack; SmallVector<uint64_t, 4> EHPadStack; - enum EHInstKind { TRY, CATCH, END_TRY }; - EHInstKind LastSeenEHInst = END_TRY; - public: WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); @@ -48,8 +44,11 @@ public: raw_ostream &O); void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printWebAssemblyHeapTypeOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O); // Autogenerated by tblgen. + std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); }; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index dfed3451e45b..55bf5d14fdac 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -62,12 +62,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( uint64_t Start = OS.tell(); uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - if (Binary <= UINT8_MAX) { + if (Binary < (1 << 8)) { OS << uint8_t(Binary); - } else { - assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet"); + } else if (Binary < (1 << 16)) { OS << uint8_t(Binary >> 8); encodeULEB128(uint8_t(Binary), OS); + } else if (Binary < (1 << 24)) { + OS << uint8_t(Binary >> 16); + encodeULEB128(uint16_t(Binary), OS); + } else { + llvm_unreachable("Very large (prefix + 3 byte) opcodes not supported"); } // For br_table instructions, encode the size of the table. In the MCInst, @@ -102,6 +106,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( encodeSLEB128(int64_t(MO.getImm()), OS); break; case WebAssembly::OPERAND_SIGNATURE: + case WebAssembly::OPERAND_HEAPTYPE: OS << uint8_t(MO.getImm()); break; case WebAssembly::OPERAND_VEC_I8IMM: @@ -151,6 +156,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( PaddedSize = 10; break; case WebAssembly::OPERAND_FUNCTION32: + case WebAssembly::OPERAND_TABLE: case WebAssembly::OPERAND_OFFSET32: case WebAssembly::OPERAND_SIGNATURE: case WebAssembly::OPERAND_TYPEINDEX: diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 027e5408c633..064e613cfc8e 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -76,7 +76,7 @@ static MCAsmBackend *createAsmBackend(const Target & /*T*/, static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, FS); + return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } static MCTargetStreamer * @@ -147,8 +147,10 @@ wasm::ValType WebAssembly::toValType(const MVT &Ty) { case MVT::v4f32: case MVT::v2f64: return wasm::ValType::V128; - case MVT::exnref: - return wasm::ValType::EXNREF; + case MVT::funcref: + return wasm::ValType::FUNCREF; + case MVT::externref: + return wasm::ValType::EXTERNREF; default: llvm_unreachable("unexpected type"); } diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 02b310628ee1..5b77b8495adf 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -76,6 +76,10 @@ enum OperandType { OPERAND_EVENT, /// A list of branch targets for br_list. OPERAND_BRLIST, + /// 32-bit unsigned table number. + OPERAND_TABLE, + /// heap type immediate for ref.null. + OPERAND_HEAPTYPE, }; } // end namespace WebAssembly @@ -97,6 +101,11 @@ enum TOF { MO_MEMORY_BASE_REL, // On a symbol operand this indicates that the immediate is the symbol + // address relative the __tls_base wasm global. + // Only applicable to data symbols. + MO_TLS_BASE_REL, + + // On a symbol operand this indicates that the immediate is the symbol // address relative the __table_base wasm global. // Only applicable to function symbols. MO_TABLE_BASE_REL, @@ -129,7 +138,8 @@ enum class BlockType : unsigned { F32 = unsigned(wasm::ValType::F32), F64 = unsigned(wasm::ValType::F64), V128 = unsigned(wasm::ValType::V128), - Exnref = unsigned(wasm::ValType::EXNREF), + Externref = unsigned(wasm::ValType::EXTERNREF), + Funcref = unsigned(wasm::ValType::FUNCREF), // Multivalue blocks (and other non-void blocks) are only emitted when the // blocks will never be exited and are at the ends of functions (see // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made @@ -138,6 +148,13 @@ enum class BlockType : unsigned { Multivalue = 0xffff, }; +/// Used as immediate MachineOperands for heap types, e.g. for ref.null. +enum class HeapType : unsigned { + Invalid = 0x00, + Externref = unsigned(wasm::ValType::EXTERNREF), + Funcref = unsigned(wasm::ValType::FUNCREF), +}; + /// Instruction opcodes emitted via means other than CodeGen. static const unsigned Nop = 0x01; static const unsigned End = 0x0b; @@ -176,8 +193,12 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64) - WASM_LOAD_STORE(LOAD_SPLAT_v8x16) - return 0; + WASM_LOAD_STORE(LOAD8_SPLAT) + WASM_LOAD_STORE(LOAD_LANE_I8x16) + WASM_LOAD_STORE(STORE_LANE_I8x16) + WASM_LOAD_STORE(PREFETCH_T) + WASM_LOAD_STORE(PREFETCH_NT) + return 0; WASM_LOAD_STORE(LOAD16_S_I32) WASM_LOAD_STORE(LOAD16_U_I32) WASM_LOAD_STORE(LOAD16_S_I64) @@ -202,8 +223,10 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64) - WASM_LOAD_STORE(LOAD_SPLAT_v16x8) - return 1; + WASM_LOAD_STORE(LOAD16_SPLAT) + WASM_LOAD_STORE(LOAD_LANE_I16x8) + WASM_LOAD_STORE(STORE_LANE_I16x8) + return 1; WASM_LOAD_STORE(LOAD_I32) WASM_LOAD_STORE(LOAD_F32) WASM_LOAD_STORE(STORE_I32) @@ -229,10 +252,13 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW32_U_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I32) WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64) - WASM_LOAD_STORE(ATOMIC_NOTIFY) - WASM_LOAD_STORE(ATOMIC_WAIT_I32) - WASM_LOAD_STORE(LOAD_SPLAT_v32x4) - return 2; + WASM_LOAD_STORE(MEMORY_ATOMIC_NOTIFY) + WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT32) + WASM_LOAD_STORE(LOAD32_SPLAT) + WASM_LOAD_STORE(LOAD_ZERO_I32x4) + WASM_LOAD_STORE(LOAD_LANE_I32x4) + WASM_LOAD_STORE(STORE_LANE_I32x4) + return 2; WASM_LOAD_STORE(LOAD_I64) WASM_LOAD_STORE(LOAD_F64) WASM_LOAD_STORE(STORE_I64) @@ -246,15 +272,18 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_RMW_XOR_I64) WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64) WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64) - WASM_LOAD_STORE(ATOMIC_WAIT_I64) - WASM_LOAD_STORE(LOAD_SPLAT_v64x2) - WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16) - WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16) - WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32) - WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32) - WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64) - WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64) - return 3; + WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT64) + WASM_LOAD_STORE(LOAD64_SPLAT) + WASM_LOAD_STORE(LOAD_EXTEND_S_I16x8) + WASM_LOAD_STORE(LOAD_EXTEND_U_I16x8) + WASM_LOAD_STORE(LOAD_EXTEND_S_I32x4) + WASM_LOAD_STORE(LOAD_EXTEND_U_I32x4) + WASM_LOAD_STORE(LOAD_EXTEND_S_I64x2) + WASM_LOAD_STORE(LOAD_EXTEND_U_I64x2) + WASM_LOAD_STORE(LOAD_ZERO_I64x2) + WASM_LOAD_STORE(LOAD_LANE_I64x2) + WASM_LOAD_STORE(STORE_LANE_I64x2) + return 3; WASM_LOAD_STORE(LOAD_V128) WASM_LOAD_STORE(STORE_V128) return 4; @@ -294,8 +323,10 @@ inline bool isArgument(unsigned Opc) { case WebAssembly::ARGUMENT_v4f32_S: case WebAssembly::ARGUMENT_v2f64: case WebAssembly::ARGUMENT_v2f64_S: - case WebAssembly::ARGUMENT_exnref: - case WebAssembly::ARGUMENT_exnref_S: + case WebAssembly::ARGUMENT_funcref: + case WebAssembly::ARGUMENT_funcref_S: + case WebAssembly::ARGUMENT_externref: + case WebAssembly::ARGUMENT_externref_S: return true; default: return false; @@ -314,8 +345,10 @@ inline bool isCopy(unsigned Opc) { case WebAssembly::COPY_F64_S: case WebAssembly::COPY_V128: case WebAssembly::COPY_V128_S: - case WebAssembly::COPY_EXNREF: - case WebAssembly::COPY_EXNREF_S: + case WebAssembly::COPY_FUNCREF: + case WebAssembly::COPY_FUNCREF_S: + case WebAssembly::COPY_EXTERNREF: + case WebAssembly::COPY_EXTERNREF_S: return true; default: return false; @@ -334,8 +367,10 @@ inline bool isTee(unsigned Opc) { case WebAssembly::TEE_F64_S: case WebAssembly::TEE_V128: case WebAssembly::TEE_V128_S: - case WebAssembly::TEE_EXNREF: - case WebAssembly::TEE_EXNREF_S: + case WebAssembly::TEE_FUNCREF: + case WebAssembly::TEE_FUNCREF_S: + case WebAssembly::TEE_EXTERNREF: + case WebAssembly::TEE_EXTERNREF_S: return true; default: return false; @@ -398,6 +433,18 @@ inline bool isMarker(unsigned Opc) { } } +inline bool isCatch(unsigned Opc) { + switch (Opc) { + case WebAssembly::CATCH: + case WebAssembly::CATCH_S: + case WebAssembly::CATCH_ALL: + case WebAssembly::CATCH_ALL_S: + return true; + default: + return false; + } +} + } // end namespace WebAssembly } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index e954eeaebb14..652d7a00a63c 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -71,8 +71,17 @@ void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) { assert(Sym->isGlobal()); OS << "\t.globaltype\t" << Sym->getName() << ", " << WebAssembly::typeToString( - static_cast<wasm::ValType>(Sym->getGlobalType().Type)) - << '\n'; + static_cast<wasm::ValType>(Sym->getGlobalType().Type)); + if (!Sym->getGlobalType().Mutable) + OS << ", immutable"; + OS << '\n'; +} + +void WebAssemblyTargetAsmStreamer::emitTableType(const MCSymbolWasm *Sym) { + assert(Sym->isTable()); + OS << "\t.tabletype\t" << Sym->getName() << ", " + << WebAssembly::typeToString(Sym->getTableType()); + OS << '\n'; } void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) { diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index d6fba05c9986..75c9fb4e289d 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -39,6 +39,8 @@ public: virtual void emitIndIdx(const MCExpr *Value) = 0; /// .globaltype virtual void emitGlobalType(const MCSymbolWasm *Sym) = 0; + /// .tabletype + virtual void emitTableType(const MCSymbolWasm *Sym) = 0; /// .eventtype virtual void emitEventType(const MCSymbolWasm *Sym) = 0; /// .import_module @@ -67,6 +69,7 @@ public: void emitFunctionType(const MCSymbolWasm *Sym) override; void emitIndIdx(const MCExpr *Value) override; void emitGlobalType(const MCSymbolWasm *Sym) override; + void emitTableType(const MCSymbolWasm *Sym) override; void emitEventType(const MCSymbolWasm *Sym) override; void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override; void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override; @@ -83,6 +86,7 @@ public: void emitFunctionType(const MCSymbolWasm *Sym) override {} void emitIndIdx(const MCExpr *Value) override; void emitGlobalType(const MCSymbolWasm *Sym) override {} + void emitTableType(const MCSymbolWasm *Sym) override {} void emitEventType(const MCSymbolWasm *Sym) override {} void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override {} @@ -103,6 +107,7 @@ public: void emitFunctionType(const MCSymbolWasm *) override {} void emitIndIdx(const MCExpr *) override {} void emitGlobalType(const MCSymbolWasm *) override {} + void emitTableType(const MCSymbolWasm *) override {} void emitEventType(const MCSymbolWasm *) override {} void emitImportModule(const MCSymbolWasm *, StringRef) override {} void emitImportName(const MCSymbolWasm *, StringRef) override {} diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 779e921c1d94..aa7e2311d240 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -76,6 +76,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, case MCSymbolRefExpr::VK_WASM_TBREL: assert(SymA.isFunction()); return wasm::R_WASM_TABLE_INDEX_REL_SLEB; + case MCSymbolRefExpr::VK_WASM_TLSREL: + return wasm::R_WASM_MEMORY_ADDR_TLS_SLEB; case MCSymbolRefExpr::VK_WASM_MBREL: assert(SymA.isData()); return is64Bit() ? wasm::R_WASM_MEMORY_ADDR_REL_SLEB64 @@ -92,7 +94,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, return wasm::R_WASM_TABLE_INDEX_SLEB; return wasm::R_WASM_MEMORY_ADDR_SLEB; case WebAssembly::fixup_sleb128_i64: - assert(SymA.isData()); + if (SymA.isFunction()) + return wasm::R_WASM_TABLE_INDEX_SLEB64; return wasm::R_WASM_MEMORY_ADDR_SLEB64; case WebAssembly::fixup_uleb128_i32: if (SymA.isGlobal()) @@ -101,6 +104,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, return wasm::R_WASM_FUNCTION_INDEX_LEB; if (SymA.isEvent()) return wasm::R_WASM_EVENT_INDEX_LEB; + if (SymA.isTable()) + return wasm::R_WASM_TABLE_NUMBER_LEB; return wasm::R_WASM_MEMORY_ADDR_LEB; case WebAssembly::fixup_uleb128_i64: assert(SymA.isData()); @@ -119,6 +124,17 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, } return wasm::R_WASM_MEMORY_ADDR_I32; case FK_Data_8: + if (SymA.isFunction()) + return wasm::R_WASM_TABLE_INDEX_I64; + if (SymA.isGlobal()) + llvm_unreachable("unimplemented R_WASM_GLOBAL_INDEX_I64"); + if (auto Section = static_cast<const MCSectionWasm *>( + getFixupSection(Fixup.getValue()))) { + if (Section->getKind().isText()) + return wasm::R_WASM_FUNCTION_OFFSET_I64; + else if (!Section->isWasmData()) + llvm_unreachable("unimplemented R_WASM_SECTION_OFFSET_I64"); + } assert(SymA.isData()); return wasm::R_WASM_MEMORY_ADDR_I64; default: diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 96fa13d30729..7f1c4bb40a4c 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -49,6 +49,8 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" extern cl::opt<bool> WasmKeepRegisters; +extern cl::opt<bool> EnableEmException; +extern cl::opt<bool> EnableEmSjLj; //===----------------------------------------------------------------------===// // Helpers. @@ -81,10 +83,92 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() { return static_cast<WebAssemblyTargetStreamer *>(TS); } +// Emscripten exception handling helpers +// +// This converts invoke names generated by LowerEmscriptenEHSjLj to real names +// that are expected by JavaScript glue code. The invoke names generated by +// Emscripten JS glue code are based on their argument and return types; for +// example, for a function that takes an i32 and returns nothing, it is +// 'invoke_vi'. But the format of invoke generated by LowerEmscriptenEHSjLj pass +// contains a mangled string generated from their IR types, for example, +// "__invoke_void_%struct.mystruct*_int", because final wasm types are not +// available in the IR pass. So we convert those names to the form that +// Emscripten JS code expects. +// +// Refer to LowerEmscriptenEHSjLj pass for more details. + +// Returns true if the given function name is an invoke name generated by +// LowerEmscriptenEHSjLj pass. +static bool isEmscriptenInvokeName(StringRef Name) { + if (Name.front() == '"' && Name.back() == '"') + Name = Name.substr(1, Name.size() - 2); + return Name.startswith("__invoke_"); +} + +// Returns a character that represents the given wasm value type in invoke +// signatures. +static char getInvokeSig(wasm::ValType VT) { + switch (VT) { + case wasm::ValType::I32: + return 'i'; + case wasm::ValType::I64: + return 'j'; + case wasm::ValType::F32: + return 'f'; + case wasm::ValType::F64: + return 'd'; + case wasm::ValType::V128: + return 'V'; + case wasm::ValType::FUNCREF: + return 'F'; + case wasm::ValType::EXTERNREF: + return 'X'; + } + llvm_unreachable("Unhandled wasm::ValType enum"); +} + +// Given the wasm signature, generate the invoke name in the format JS glue code +// expects. +static std::string getEmscriptenInvokeSymbolName(wasm::WasmSignature *Sig) { + assert(Sig->Returns.size() <= 1); + std::string Ret = "invoke_"; + if (!Sig->Returns.empty()) + for (auto VT : Sig->Returns) + Ret += getInvokeSig(VT); + else + Ret += 'v'; + // Invokes' first argument is a pointer to the original function, so skip it + for (unsigned I = 1, E = Sig->Params.size(); I < E; I++) + Ret += getInvokeSig(Sig->Params[I]); + return Ret; +} + //===----------------------------------------------------------------------===// // WebAssemblyAsmPrinter Implementation. //===----------------------------------------------------------------------===// +MCSymbolWasm *WebAssemblyAsmPrinter::getMCSymbolForFunction( + const Function *F, bool EnableEmEH, wasm::WasmSignature *Sig, + bool &InvokeDetected) { + MCSymbolWasm *WasmSym = nullptr; + if (EnableEmEH && isEmscriptenInvokeName(F->getName())) { + assert(Sig); + InvokeDetected = true; + if (Sig->Returns.size() > 1) { + std::string Msg = + "Emscripten EH/SjLj does not support multivalue returns: " + + std::string(F->getName()) + ": " + + WebAssembly::signatureToString(Sig); + report_fatal_error(Msg); + } + WasmSym = cast<MCSymbolWasm>( + GetExternalSymbolSymbol(getEmscriptenInvokeSymbolName(Sig))); + } else { + WasmSym = cast<MCSymbolWasm>(getSymbol(F)); + } + return WasmSym; +} + void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { for (auto &It : OutContext.getSymbols()) { // Emit a .globaltype and .eventtype declaration. @@ -95,6 +179,7 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { getTargetStreamer()->emitEventType(Sym); } + DenseSet<MCSymbol *> InvokeSymbols; for (const auto &F : M) { if (F.isIntrinsic()) continue; @@ -104,31 +189,46 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { SmallVector<MVT, 4> Results; SmallVector<MVT, 4> Params; computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results); - auto *Sym = cast<MCSymbolWasm>(getSymbol(&F)); + // At this point these MCSymbols may or may not have been created already + // and thus also contain a signature, but we need to get the signature + // anyway here in case it is an invoke that has not yet been created. We + // will discard it later if it turns out not to be necessary. + auto Signature = signatureFromMVTs(Results, Params); + bool InvokeDetected = false; + auto *Sym = getMCSymbolForFunction(&F, EnableEmException || EnableEmSjLj, + Signature.get(), InvokeDetected); + + // Multiple functions can be mapped to the same invoke symbol. For + // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32' + // are both mapped to '__invoke_vi'. We keep them in a set once we emit an + // Emscripten EH symbol so we don't emit the same symbol twice. + if (InvokeDetected && !InvokeSymbols.insert(Sym).second) + continue; + Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); if (!Sym->getSignature()) { - auto Signature = signatureFromMVTs(Results, Params); Sym->setSignature(Signature.get()); addSignature(std::move(Signature)); + } else { + // This symbol has already been created and had a signature. Discard it. + Signature.reset(); } - // FIXME: this was originally intended for post-linking and was only used - // for imports that were only called indirectly (i.e. s2wasm could not - // infer the type from a call). With object files it applies to all - // imports. so fix the names and the tests, or rethink how import - // delcarations work in asm files. + getTargetStreamer()->emitFunctionType(Sym); - if (TM.getTargetTriple().isOSBinFormatWasm() && - F.hasFnAttribute("wasm-import-module")) { + if (F.hasFnAttribute("wasm-import-module")) { StringRef Name = F.getFnAttribute("wasm-import-module").getValueAsString(); Sym->setImportModule(storeName(Name)); getTargetStreamer()->emitImportModule(Sym, Name); } - if (TM.getTargetTriple().isOSBinFormatWasm() && - F.hasFnAttribute("wasm-import-name")) { + if (F.hasFnAttribute("wasm-import-name")) { + // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use + // the original function name but the converted symbol name. StringRef Name = - F.getFnAttribute("wasm-import-name").getValueAsString(); + InvokeDetected + ? Sym->getName() + : F.getFnAttribute("wasm-import-name").getValueAsString(); Sym->setImportName(storeName(Name)); getTargetStreamer()->emitImportName(Sym, Name); } @@ -304,7 +404,6 @@ void WebAssemblyAsmPrinter::emitFunctionBodyStart() { addSignature(std::move(Signature)); WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); - // FIXME: clean up how params and results are emitted (use signatures) getTargetStreamer()->emitFunctionType(WasmSym); // Emit the function index. @@ -362,14 +461,6 @@ void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) { // This is a compiler barrier that prevents instruction reordering during // backend compilation, and should not be emitted. break; - case WebAssembly::EXTRACT_EXCEPTION_I32: - case WebAssembly::EXTRACT_EXCEPTION_I32_S: - // These are pseudo instructions that simulates popping values from stack. - // We print these only when we have -wasm-keep-registers on for assembly - // readability. - if (!WasmKeepRegisters) - break; - LLVM_FALLTHROUGH; default: { WebAssemblyMCInstLower MCInstLowering(OutContext, *this); MCInst TmpInst; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h index d9281568638d..7a6a3247a19f 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h @@ -77,6 +77,9 @@ public: MVT getRegType(unsigned RegNo) const; std::string regToString(const MachineOperand &MO); WebAssemblyTargetStreamer *getTargetStreamer(); + MCSymbolWasm *getMCSymbolForFunction(const Function *F, bool EnableEmEH, + wasm::WasmSignature *Sig, + bool &InvokeDetected); }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 8442b49e25f4..eb3e9b91d40d 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -19,6 +19,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyExceptionInfo.h" +#include "WebAssemblySortRegion.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/PriorityQueue.h" @@ -31,6 +32,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +using WebAssembly::SortRegion; +using WebAssembly::SortRegionInfo; #define DEBUG_TYPE "wasm-cfg-sort" @@ -44,78 +47,6 @@ static cl::opt<bool> WasmDisableEHPadSort( namespace { -// Wrapper for loops and exceptions -class Region { -public: - virtual ~Region() = default; - virtual MachineBasicBlock *getHeader() const = 0; - virtual bool contains(const MachineBasicBlock *MBB) const = 0; - virtual unsigned getNumBlocks() const = 0; - using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; - virtual iterator_range<block_iterator> blocks() const = 0; - virtual bool isLoop() const = 0; -}; - -template <typename T> class ConcreteRegion : public Region { - const T *Region; - -public: - ConcreteRegion(const T *Region) : Region(Region) {} - MachineBasicBlock *getHeader() const override { return Region->getHeader(); } - bool contains(const MachineBasicBlock *MBB) const override { - return Region->contains(MBB); - } - unsigned getNumBlocks() const override { return Region->getNumBlocks(); } - iterator_range<block_iterator> blocks() const override { - return Region->blocks(); - } - bool isLoop() const override { return false; } -}; - -template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; } - -// This class has information of nested Regions; this is analogous to what -// LoopInfo is for loops. -class RegionInfo { - const MachineLoopInfo &MLI; - const WebAssemblyExceptionInfo &WEI; - DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap; - DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap; - -public: - RegionInfo(const MachineLoopInfo &MLI, const WebAssemblyExceptionInfo &WEI) - : MLI(MLI), WEI(WEI) {} - - // Returns a smallest loop or exception that contains MBB - const Region *getRegionFor(const MachineBasicBlock *MBB) { - const auto *ML = MLI.getLoopFor(MBB); - const auto *WE = WEI.getExceptionFor(MBB); - if (!ML && !WE) - return nullptr; - // We determine subregion relationship by domination of their headers, i.e., - // if region A's header dominates region B's header, B is a subregion of A. - // WebAssemblyException contains BBs in all its subregions (loops or - // exceptions), but MachineLoop may not, because MachineLoop does not contain - // BBs that don't have a path to its header even if they are dominated by - // its header. So here we should use WE->contains(ML->getHeader()), but not - // ML->contains(WE->getHeader()). - if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) { - // If the smallest region containing MBB is a loop - if (LoopMap.count(ML)) - return LoopMap[ML].get(); - LoopMap[ML] = std::make_unique<ConcreteRegion<MachineLoop>>(ML); - return LoopMap[ML].get(); - } else { - // If the smallest region containing MBB is an exception - if (ExceptionMap.count(WE)) - return ExceptionMap[WE].get(); - ExceptionMap[WE] = - std::make_unique<ConcreteRegion<WebAssemblyException>>(WE); - return ExceptionMap[WE].get(); - } - } -}; - class WebAssemblyCFGSort final : public MachineFunctionPass { StringRef getPassName() const override { return "WebAssembly CFG Sort"; } @@ -236,14 +167,14 @@ struct CompareBlockNumbersBackwards { /// Bookkeeping for a region to help ensure that we don't mix blocks not /// dominated by the its header among its blocks. struct Entry { - const Region *TheRegion; + const SortRegion *TheRegion; unsigned NumBlocksLeft; /// List of blocks not dominated by Loop's header that are deferred until /// after all of Loop's blocks have been seen. std::vector<MachineBasicBlock *> Deferred; - explicit Entry(const class Region *R) + explicit Entry(const SortRegion *R) : TheRegion(R), NumBlocksLeft(R->getNumBlocks()) {} }; } // end anonymous namespace @@ -287,10 +218,10 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, CompareBlockNumbersBackwards> Ready; - RegionInfo RI(MLI, WEI); + SortRegionInfo SRI(MLI, WEI); SmallVector<Entry, 4> Entries; for (MachineBasicBlock *MBB = &MF.front();;) { - const Region *R = RI.getRegionFor(MBB); + const SortRegion *R = SRI.getRegionFor(MBB); if (R) { // If MBB is a region header, add it to the active region list. We can't // put any blocks that it doesn't dominate until we see the end of the @@ -373,7 +304,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, MF.RenumberBlocks(); #ifndef NDEBUG - SmallSetVector<const Region *, 8> OnStack; + SmallSetVector<const SortRegion *, 8> OnStack; // Insert a sentinel representing the degenerate loop that starts at the // function entry block and includes the entire function as a "loop" that @@ -382,7 +313,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, for (auto &MBB : MF) { assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); - const Region *Region = RI.getRegionFor(&MBB); + const SortRegion *Region = SRI.getRegionFor(&MBB); if (Region && &MBB == Region->getHeader()) { // Region header. @@ -408,10 +339,10 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, for (auto Pred : MBB.predecessors()) assert(Pred->getNumber() < MBB.getNumber() && "Non-loop-header predecessors should be topologically sorted"); - assert(OnStack.count(RI.getRegionFor(&MBB)) && + assert(OnStack.count(SRI.getRegionFor(&MBB)) && "Blocks must be nested in their regions"); } - while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back())) + while (OnStack.size() > 1 && &MBB == SRI.getBottom(OnStack.back())) OnStack.pop_back(); } assert(OnStack.pop_back_val() == nullptr && diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 8cbfc98e8197..a8e0c3efea0e 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -24,6 +24,7 @@ #include "WebAssembly.h" #include "WebAssemblyExceptionInfo.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySortRegion.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +34,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; +using WebAssembly::SortRegionInfo; #define DEBUG_TYPE "wasm-cfg-stackify" @@ -55,6 +57,11 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass { // which holds the beginning of the scope. This will allow us to quickly skip // over scoped regions when walking blocks. SmallVector<MachineBasicBlock *, 8> ScopeTops; + void updateScopeTops(MachineBasicBlock *Begin, MachineBasicBlock *End) { + int EndNo = End->getNumber(); + if (!ScopeTops[EndNo] || ScopeTops[EndNo]->getNumber() > Begin->getNumber()) + ScopeTops[EndNo] = Begin; + } // Placing markers. void placeMarkers(MachineFunction &MF); @@ -133,10 +140,10 @@ static bool explicitlyBranchesTo(MachineBasicBlock *Pred, // contains instructions that should go before the marker, and AfterSet contains // ones that should go after the marker. In this function, AfterSet is only // used for sanity checking. +template <typename Container> static MachineBasicBlock::iterator -getEarliestInsertPos(MachineBasicBlock *MBB, - const SmallPtrSet<const MachineInstr *, 4> &BeforeSet, - const SmallPtrSet<const MachineInstr *, 4> &AfterSet) { +getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet, + const Container &AfterSet) { auto InsertPos = MBB->end(); while (InsertPos != MBB->begin()) { if (BeforeSet.count(&*std::prev(InsertPos))) { @@ -157,10 +164,10 @@ getEarliestInsertPos(MachineBasicBlock *MBB, // contains instructions that should go before the marker, and AfterSet contains // ones that should go after the marker. In this function, BeforeSet is only // used for sanity checking. +template <typename Container> static MachineBasicBlock::iterator -getLatestInsertPos(MachineBasicBlock *MBB, - const SmallPtrSet<const MachineInstr *, 4> &BeforeSet, - const SmallPtrSet<const MachineInstr *, 4> &AfterSet) { +getLatestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet, + const Container &AfterSet) { auto InsertPos = MBB->begin(); while (InsertPos != MBB->end()) { if (AfterSet.count(&*InsertPos)) { @@ -219,20 +226,12 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { // which reduces overall stack height. MachineBasicBlock *Header = nullptr; bool IsBranchedTo = false; - bool IsBrOnExn = false; - MachineInstr *BrOnExn = nullptr; int MBBNumber = MBB.getNumber(); for (MachineBasicBlock *Pred : MBB.predecessors()) { if (Pred->getNumber() < MBBNumber) { Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; - if (explicitlyBranchesTo(Pred, &MBB)) { + if (explicitlyBranchesTo(Pred, &MBB)) IsBranchedTo = true; - if (Pred->getFirstTerminator()->getOpcode() == WebAssembly::BR_ON_EXN) { - IsBrOnExn = true; - assert(!BrOnExn && "There should be only one br_on_exn per block"); - BrOnExn = &*Pred->getFirstTerminator(); - } - } } } if (!Header) @@ -317,22 +316,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { } // Add the BLOCK. - - // 'br_on_exn' extracts exnref object and pushes variable number of values - // depending on its tag. For C++ exception, its a single i32 value, and the - // generated code will be in the form of: - // block i32 - // br_on_exn 0, $__cpp_exception - // rethrow - // end_block WebAssembly::BlockType ReturnType = WebAssembly::BlockType::Void; - if (IsBrOnExn) { - const char *TagName = BrOnExn->getOperand(1).getSymbolName(); - if (std::strcmp(TagName, "__cpp_exception") != 0) - llvm_unreachable("Only C++ exception is supported"); - ReturnType = WebAssembly::BlockType::I32; - } - auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos), @@ -372,16 +356,15 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { registerScope(Begin, End); // Track the farthest-spanning scope that ends at this point. - int Number = MBB.getNumber(); - if (!ScopeTops[Number] || - ScopeTops[Number]->getNumber() > Header->getNumber()) - ScopeTops[Number] = Header; + updateScopeTops(Header, &MBB); } /// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { MachineFunction &MF = *MBB.getParent(); const auto &MLI = getAnalysis<MachineLoopInfo>(); + const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>(); + SortRegionInfo SRI(MLI, WEI); const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); MachineLoop *Loop = MLI.getLoopFor(&MBB); @@ -390,7 +373,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { // The operand of a LOOP is the first block after the loop. If the loop is the // bottom of the function, insert a dummy block at the end. - MachineBasicBlock *Bottom = WebAssembly::getBottom(Loop); + MachineBasicBlock *Bottom = SRI.getBottom(Loop); auto Iter = std::next(Bottom->getIterator()); if (Iter == MF.end()) { getAppendixBlock(MF); @@ -441,8 +424,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { assert((!ScopeTops[AfterLoop->getNumber()] || ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && "With block sorting the outermost loop for a block should be first."); - if (!ScopeTops[AfterLoop->getNumber()]) - ScopeTops[AfterLoop->getNumber()] = &MBB; + updateScopeTops(&MBB, AfterLoop); } void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { @@ -450,7 +432,9 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { MachineFunction &MF = *MBB.getParent(); auto &MDT = getAnalysis<MachineDominatorTree>(); const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const auto &MLI = getAnalysis<MachineLoopInfo>(); const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>(); + SortRegionInfo SRI(MLI, WEI); const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); // Compute the nearest common dominator of all unwind predecessors @@ -470,7 +454,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { // end. WebAssemblyException *WE = WEI.getExceptionFor(&MBB); assert(WE); - MachineBasicBlock *Bottom = WebAssembly::getBottom(WE); + MachineBasicBlock *Bottom = SRI.getBottom(WE); auto Iter = std::next(Bottom->getIterator()); if (Iter == MF.end()) { @@ -639,11 +623,8 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { // catch | // end_block --| // end_try - for (int Number : {Cont->getNumber(), MBB.getNumber()}) { - if (!ScopeTops[Number] || - ScopeTops[Number]->getNumber() > Header->getNumber()) - ScopeTops[Number] = Header; - } + for (auto *End : {&MBB, Cont}) + updateScopeTops(Header, End); } void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { @@ -656,11 +637,32 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { // try // ... // br bb2 <- Not necessary - // bb1: + // bb1 (ehpad): + // catch + // ... + // bb2: <- Continuation BB + // end + // + // A more involved case: When the BB where 'end' is located is an another EH + // pad, the Cont (= continuation) BB is that EH pad's 'end' BB. For example, + // bb0: + // try + // try + // ... + // br bb3 <- Not necessary + // bb1 (ehpad): + // catch + // bb2 (ehpad): + // end // catch // ... - // bb2: + // bb3: <- Continuation BB // end + // + // When the EH pad at hand is bb1, its matching end_try is in bb2. But it is + // another EH pad, so bb0's continuation BB becomes bb3. So 'br bb3' in the + // code can be deleted. This is why we run 'while' until 'Cont' is not an EH + // pad. for (auto &MBB : MF) { if (!MBB.isEHPad()) continue; @@ -668,7 +670,14 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode(); - MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent(); + + MachineBasicBlock *Cont = &MBB; + while (Cont->isEHPad()) { + MachineInstr *Try = EHPadToTry[Cont]; + MachineInstr *EndTry = BeginToEnd[Try]; + Cont = EndTry->getParent(); + } + bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond); // This condition means either // 1. This BB ends with a single unconditional branch whose destinaion is @@ -745,18 +754,26 @@ static unsigned getCopyOpcode(const TargetRegisterClass *RC) { return WebAssembly::COPY_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::COPY_V128; - if (RC == &WebAssembly::EXNREFRegClass) - return WebAssembly::COPY_EXNREF; + if (RC == &WebAssembly::FUNCREFRegClass) + return WebAssembly::COPY_FUNCREF; + if (RC == &WebAssembly::EXTERNREFRegClass) + return WebAssembly::COPY_EXTERNREF; llvm_unreachable("Unexpected register class"); } // When MBB is split into MBB and Split, we should unstackify defs in MBB that // have their uses in Split. -static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, - MachineBasicBlock &Split, - WebAssemblyFunctionInfo &MFI, - MachineRegisterInfo &MRI, - const WebAssemblyInstrInfo &TII) { +// FIXME This function will be used when fixing unwind mismatches, but the old +// version of that function was removed for the moment and the new version has +// not yet been added. So 'LLVM_ATTRIBUTE_UNUSED' is added to suppress the +// warning. Remove the attribute after the new functionality is added. +LLVM_ATTRIBUTE_UNUSED static void +unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, MachineBasicBlock &Split) { + MachineFunction &MF = *MBB.getParent(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + auto &MRI = MF.getRegInfo(); + for (auto &MI : Split) { for (auto &MO : MI.explicit_uses()) { if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg())) @@ -810,525 +827,8 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, } bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { - const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); - auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Linearizing the control flow by placing TRY / END_TRY markers can create - // mismatches in unwind destinations. There are two kinds of mismatches we - // try to solve here. - - // 1. When an instruction may throw, but the EH pad it will unwind to can be - // different from the original CFG. - // - // Example: we have the following CFG: - // bb0: - // call @foo (if it throws, unwind to bb2) - // bb1: - // call @bar (if it throws, unwind to bb3) - // bb2 (ehpad): - // catch - // ... - // bb3 (ehpad) - // catch - // handler body - // - // And the CFG is sorted in this order. Then after placing TRY markers, it - // will look like: (BB markers are omitted) - // try $label1 - // try - // call @foo - // call @bar (if it throws, unwind to bb3) - // catch <- ehpad (bb2) - // ... - // end_try - // catch <- ehpad (bb3) - // handler body - // end_try - // - // Now if bar() throws, it is going to end up ip in bb2, not bb3, where it - // is supposed to end up. We solve this problem by - // a. Split the target unwind EH pad (here bb3) so that the handler body is - // right after 'end_try', which means we extract the handler body out of - // the catch block. We do this because this handler body should be - // somewhere branch-eable from the inner scope. - // b. Wrap the call that has an incorrect unwind destination ('call @bar' - // here) with a nested try/catch/end_try scope, and within the new catch - // block, branches to the handler body. - // c. Place a branch after the newly inserted nested end_try so it can bypass - // the handler body, which is now outside of a catch block. - // - // The result will like as follows. (new: a) means this instruction is newly - // created in the process of doing 'a' above. - // - // block $label0 (new: placeBlockMarker) - // try $label1 - // try - // call @foo - // try (new: b) - // call @bar - // catch (new: b) - // local.set n / drop (new: b) - // br $label1 (new: b) - // end_try (new: b) - // catch <- ehpad (bb2) - // end_try - // br $label0 (new: c) - // catch <- ehpad (bb3) - // end_try (hoisted: a) - // handler body - // end_block (new: placeBlockMarker) - // - // Note that the new wrapping block/end_block will be generated later in - // placeBlockMarker. - // - // TODO Currently local.set and local.gets are generated to move exnref value - // created by catches. That's because we don't support yielding values from a - // block in LLVM machine IR yet, even though it is supported by wasm. Delete - // unnecessary local.get/local.sets once yielding values from a block is - // supported. The full EH spec requires multi-value support to do this, but - // for C++ we don't yet need it because we only throw a single i32. - // - // --- - // 2. The same as 1, but in this case an instruction unwinds to a caller - // function and not another EH pad. - // - // Example: we have the following CFG: - // bb0: - // call @foo (if it throws, unwind to bb2) - // bb1: - // call @bar (if it throws, unwind to caller) - // bb2 (ehpad): - // catch - // ... - // - // And the CFG is sorted in this order. Then after placing TRY markers, it - // will look like: - // try - // call @foo - // call @bar (if it throws, unwind to caller) - // catch <- ehpad (bb2) - // ... - // end_try - // - // Now if bar() throws, it is going to end up ip in bb2, when it is supposed - // throw up to the caller. - // We solve this problem by - // a. Create a new 'appendix' BB at the end of the function and put a single - // 'rethrow' instruction (+ local.get) in there. - // b. Wrap the call that has an incorrect unwind destination ('call @bar' - // here) with a nested try/catch/end_try scope, and within the new catch - // block, branches to the new appendix block. - // - // block $label0 (new: placeBlockMarker) - // try - // call @foo - // try (new: b) - // call @bar - // catch (new: b) - // local.set n (new: b) - // br $label0 (new: b) - // end_try (new: b) - // catch <- ehpad (bb2) - // ... - // end_try - // ... - // end_block (new: placeBlockMarker) - // local.get n (new: a) <- appendix block - // rethrow (new: a) - // - // In case there are multiple calls in a BB that may throw to the caller, they - // can be wrapped together in one nested try scope. (In 1, this couldn't - // happen, because may-throwing instruction there had an unwind destination, - // i.e., it was an invoke before, and there could be only one invoke within a - // BB.) - - SmallVector<const MachineBasicBlock *, 8> EHPadStack; - // Range of intructions to be wrapped in a new nested try/catch - using TryRange = std::pair<MachineInstr *, MachineInstr *>; - // In original CFG, <unwind destination BB, a vector of try ranges> - DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> UnwindDestToTryRanges; - // In new CFG, <destination to branch to, a vector of try ranges> - DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> BrDestToTryRanges; - // In new CFG, <destination to branch to, register containing exnref> - DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg; - - // Destinations for branches that will be newly added, for which a new - // BLOCK/END_BLOCK markers are necessary. - SmallVector<MachineBasicBlock *, 8> BrDests; - - // Gather possibly throwing calls (i.e., previously invokes) whose current - // unwind destination is not the same as the original CFG. - for (auto &MBB : reverse(MF)) { - bool SeenThrowableInstInBB = false; - for (auto &MI : reverse(MBB)) { - if (MI.getOpcode() == WebAssembly::TRY) - EHPadStack.pop_back(); - else if (MI.getOpcode() == WebAssembly::CATCH) - EHPadStack.push_back(MI.getParent()); - - // In this loop we only gather calls that have an EH pad to unwind. So - // there will be at most 1 such call (= invoke) in a BB, so after we've - // seen one, we can skip the rest of BB. Also if MBB has no EH pad - // successor or MI does not throw, this is not an invoke. - if (SeenThrowableInstInBB || !MBB.hasEHPadSuccessor() || - !WebAssembly::mayThrow(MI)) - continue; - SeenThrowableInstInBB = true; - - // If the EH pad on the stack top is where this instruction should unwind - // next, we're good. - MachineBasicBlock *UnwindDest = nullptr; - for (auto *Succ : MBB.successors()) { - if (Succ->isEHPad()) { - UnwindDest = Succ; - break; - } - } - if (EHPadStack.back() == UnwindDest) - continue; - - // If not, record the range. - UnwindDestToTryRanges[UnwindDest].push_back(TryRange(&MI, &MI)); - } - } - - assert(EHPadStack.empty()); - - // Gather possibly throwing calls that are supposed to unwind up to the caller - // if they throw, but currently unwind to an incorrect destination. Unlike the - // loop above, there can be multiple calls within a BB that unwind to the - // caller, which we should group together in a range. - bool NeedAppendixBlock = false; - for (auto &MBB : reverse(MF)) { - MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; // inclusive - for (auto &MI : reverse(MBB)) { - if (MI.getOpcode() == WebAssembly::TRY) - EHPadStack.pop_back(); - else if (MI.getOpcode() == WebAssembly::CATCH) - EHPadStack.push_back(MI.getParent()); - - // If MBB has an EH pad successor, this inst does not unwind to caller. - if (MBB.hasEHPadSuccessor()) - continue; - - // We wrap up the current range when we see a marker even if we haven't - // finished a BB. - if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) { - NeedAppendixBlock = true; - // Record the range. nullptr here means the unwind destination is the - // caller. - UnwindDestToTryRanges[nullptr].push_back( - TryRange(RangeBegin, RangeEnd)); - RangeBegin = RangeEnd = nullptr; // Reset range pointers - } - - // If EHPadStack is empty, that means it is correctly unwind to caller if - // it throws, so we're good. If MI does not throw, we're good too. - if (EHPadStack.empty() || !WebAssembly::mayThrow(MI)) - continue; - - // We found an instruction that unwinds to the caller but currently has an - // incorrect unwind destination. Create a new range or increment the - // currently existing range. - if (!RangeEnd) - RangeBegin = RangeEnd = &MI; - else - RangeBegin = &MI; - } - - if (RangeEnd) { - NeedAppendixBlock = true; - // Record the range. nullptr here means the unwind destination is the - // caller. - UnwindDestToTryRanges[nullptr].push_back(TryRange(RangeBegin, RangeEnd)); - RangeBegin = RangeEnd = nullptr; // Reset range pointers - } - } - - assert(EHPadStack.empty()); - // We don't have any unwind destination mismatches to resolve. - if (UnwindDestToTryRanges.empty()) - return false; - - // If we found instructions that should unwind to the caller but currently - // have incorrect unwind destination, we create an appendix block at the end - // of the function with a local.get and a rethrow instruction. - if (NeedAppendixBlock) { - auto *AppendixBB = getAppendixBlock(MF); - Register ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); - BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW)) - .addReg(ExnReg); - // These instruction ranges should branch to this appendix BB. - for (auto Range : UnwindDestToTryRanges[nullptr]) - BrDestToTryRanges[AppendixBB].push_back(Range); - BrDestToExnReg[AppendixBB] = ExnReg; - } - - // We loop through unwind destination EH pads that are targeted from some - // inner scopes. Because these EH pads are destination of more than one scope - // now, we split them so that the handler body is after 'end_try'. - // - Before - // ehpad: - // catch - // local.set n / drop - // handler body - // ... - // cont: - // end_try - // - // - After - // ehpad: - // catch - // local.set n / drop - // brdest: (new) - // end_try (hoisted from 'cont' BB) - // handler body (taken from 'ehpad') - // ... - // cont: - for (auto &P : UnwindDestToTryRanges) { - NumUnwindMismatches += P.second.size(); - - // This means the destination is the appendix BB, which was separately - // handled above. - if (!P.first) - continue; - - MachineBasicBlock *EHPad = P.first; - - // Find 'catch' and 'local.set' or 'drop' instruction that follows the - // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be - // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is - // generated after 'catch' in LateEHPrepare and we don't support blocks - // taking values yet. - MachineInstr *Catch = nullptr; - unsigned ExnReg = 0; - for (auto &MI : *EHPad) { - switch (MI.getOpcode()) { - case WebAssembly::CATCH: - Catch = &MI; - ExnReg = Catch->getOperand(0).getReg(); - break; - } - } - assert(Catch && "EH pad does not have a catch"); - assert(ExnReg != 0 && "Invalid register"); - - auto SplitPos = std::next(Catch->getIterator()); - - // Create a new BB that's gonna be the destination for branches from the - // inner mismatched scope. - MachineInstr *BeginTry = EHPadToTry[EHPad]; - MachineInstr *EndTry = BeginToEnd[BeginTry]; - MachineBasicBlock *Cont = EndTry->getParent(); - auto *BrDest = MF.CreateMachineBasicBlock(); - MF.insert(std::next(EHPad->getIterator()), BrDest); - // Hoist up the existing 'end_try'. - BrDest->insert(BrDest->end(), EndTry->removeFromParent()); - // Take out the handler body from EH pad to the new branch destination BB. - BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end()); - unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI, TII); - // Fix predecessor-successor relationship. - BrDest->transferSuccessors(EHPad); - EHPad->addSuccessor(BrDest); - - // All try ranges that were supposed to unwind to this EH pad now have to - // branch to this new branch dest BB. - for (auto Range : UnwindDestToTryRanges[EHPad]) - BrDestToTryRanges[BrDest].push_back(Range); - BrDestToExnReg[BrDest] = ExnReg; - - // In case we fall through to the continuation BB after the catch block, we - // now have to add a branch to it. - // - Before - // try - // ... - // (falls through to 'cont') - // catch - // handler body - // end - // <-- cont - // - // - After - // try - // ... - // br %cont (new) - // catch - // end - // handler body - // <-- cont - MachineBasicBlock *EHPadLayoutPred = &*std::prev(EHPad->getIterator()); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector<MachineOperand, 4> Cond; - bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond); - if (Analyzable && !TBB && !FBB) { - DebugLoc DL = EHPadLayoutPred->empty() - ? DebugLoc() - : EHPadLayoutPred->rbegin()->getDebugLoc(); - BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont); - BrDests.push_back(Cont); - } - } - - // For possibly throwing calls whose unwind destinations are currently - // incorrect because of CFG linearization, we wrap them with a nested - // try/catch/end_try, and within the new catch block, we branch to the correct - // handler. - // - Before - // mbb: - // call @foo <- Unwind destination mismatch! - // ehpad: - // ... - // - // - After - // mbb: - // try (new) - // call @foo - // nested-ehpad: (new) - // catch (new) - // local.set n / drop (new) - // br %brdest (new) - // nested-end: (new) - // end_try (new) - // ehpad: - // ... - for (auto &P : BrDestToTryRanges) { - MachineBasicBlock *BrDest = P.first; - auto &TryRanges = P.second; - unsigned ExnReg = BrDestToExnReg[BrDest]; - - for (auto Range : TryRanges) { - MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; - std::tie(RangeBegin, RangeEnd) = Range; - auto *MBB = RangeBegin->getParent(); - // Store the first function call from this range, because RangeBegin can - // be moved to point EH_LABEL before the call - MachineInstr *RangeBeginCall = RangeBegin; - - // Include possible EH_LABELs in the range - if (RangeBegin->getIterator() != MBB->begin() && - std::prev(RangeBegin->getIterator())->isEHLabel()) - RangeBegin = &*std::prev(RangeBegin->getIterator()); - if (std::next(RangeEnd->getIterator()) != MBB->end() && - std::next(RangeEnd->getIterator())->isEHLabel()) - RangeEnd = &*std::next(RangeEnd->getIterator()); - - MachineBasicBlock *EHPad = nullptr; - for (auto *Succ : MBB->successors()) { - if (Succ->isEHPad()) { - EHPad = Succ; - break; - } - } - - // Local expression tree before the first call of this range should go - // after the nested TRY. - SmallPtrSet<const MachineInstr *, 4> AfterSet; - AfterSet.insert(RangeBegin); - AfterSet.insert(RangeBeginCall); - for (auto I = MachineBasicBlock::iterator(RangeBeginCall), - E = MBB->begin(); - I != E; --I) { - if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) - continue; - if (WebAssembly::isChild(*std::prev(I), MFI)) - AfterSet.insert(&*std::prev(I)); - else - break; - } - - // Create the nested try instruction. - auto InsertPos = getLatestInsertPos( - MBB, SmallPtrSet<const MachineInstr *, 4>(), AfterSet); - MachineInstr *NestedTry = - BuildMI(*MBB, InsertPos, RangeBegin->getDebugLoc(), - TII.get(WebAssembly::TRY)) - .addImm(int64_t(WebAssembly::BlockType::Void)); - - // Create the nested EH pad and fill instructions in. - MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock(); - MF.insert(std::next(MBB->getIterator()), NestedEHPad); - NestedEHPad->setIsEHPad(); - NestedEHPad->setIsEHScopeEntry(); - BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::CATCH), - ExnReg); - BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::BR)) - .addMBB(BrDest); - - // Create the nested continuation BB and end_try instruction. - MachineBasicBlock *NestedCont = MF.CreateMachineBasicBlock(); - MF.insert(std::next(NestedEHPad->getIterator()), NestedCont); - MachineInstr *NestedEndTry = - BuildMI(*NestedCont, NestedCont->begin(), RangeEnd->getDebugLoc(), - TII.get(WebAssembly::END_TRY)); - // In case MBB has more instructions after the try range, move them to the - // new nested continuation BB. - NestedCont->splice(NestedCont->end(), MBB, - std::next(RangeEnd->getIterator()), MBB->end()); - unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI, TII); - registerTryScope(NestedTry, NestedEndTry, NestedEHPad); - - // Fix predecessor-successor relationship. - NestedCont->transferSuccessors(MBB); - if (EHPad) { - NestedCont->removeSuccessor(EHPad); - // If EHPad does not have any predecessors left after removing - // NextedCont predecessor, remove its successor too, because this EHPad - // is not reachable from the entry BB anyway. We can't remove EHPad BB - // itself because it can contain 'catch' or 'end', which are necessary - // for keeping try-catch-end structure. - if (EHPad->pred_empty()) - EHPad->removeSuccessor(BrDest); - } - MBB->addSuccessor(NestedEHPad); - MBB->addSuccessor(NestedCont); - NestedEHPad->addSuccessor(BrDest); - } - } - - // Renumber BBs and recalculate ScopeTop info because new BBs might have been - // created and inserted above. - MF.RenumberBlocks(); - ScopeTops.clear(); - ScopeTops.resize(MF.getNumBlockIDs()); - for (auto &MBB : reverse(MF)) { - for (auto &MI : reverse(MBB)) { - if (ScopeTops[MBB.getNumber()]) - break; - switch (MI.getOpcode()) { - case WebAssembly::END_BLOCK: - case WebAssembly::END_LOOP: - case WebAssembly::END_TRY: - ScopeTops[MBB.getNumber()] = EndToBegin[&MI]->getParent(); - break; - case WebAssembly::CATCH: - ScopeTops[MBB.getNumber()] = EHPadToTry[&MBB]->getParent(); - break; - } - } - } - - // Recompute the dominator tree. - getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF); - - // Place block markers for newly added branches, if necessary. - - // If we've created an appendix BB and a branch to it, place a block/end_block - // marker for that. For some new branches, those branch destination BBs start - // with a hoisted end_try marker, so we don't need a new marker there. - if (AppendixBB) - BrDests.push_back(AppendixBB); - - llvm::sort(BrDests, - [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { - auto ANum = A->getNumber(); - auto BNum = B->getNumber(); - return ANum < BNum; - }); - for (auto *Dest : BrDests) - placeBlockMarker(*Dest); - - return true; + // TODO Implement this + return false; } static unsigned @@ -1365,22 +865,44 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { : WebAssembly::BlockType( WebAssembly::toValType(MFI.getResults().front())); - for (MachineBasicBlock &MBB : reverse(MF)) { - for (MachineInstr &MI : reverse(MBB)) { + SmallVector<MachineBasicBlock::reverse_iterator, 4> Worklist; + Worklist.push_back(MF.rbegin()->rbegin()); + + auto Process = [&](MachineBasicBlock::reverse_iterator It) { + auto *MBB = It->getParent(); + while (It != MBB->rend()) { + MachineInstr &MI = *It++; if (MI.isPosition() || MI.isDebugInstr()) continue; switch (MI.getOpcode()) { + case WebAssembly::END_TRY: { + // If a 'try''s return type is fixed, both its try body and catch body + // should satisfy the return type, so we need to search 'end' + // instructions before its corresponding 'catch' too. + auto *EHPad = TryToEHPad.lookup(EndToBegin[&MI]); + assert(EHPad); + auto NextIt = + std::next(WebAssembly::findCatch(EHPad)->getReverseIterator()); + if (NextIt != EHPad->rend()) + Worklist.push_back(NextIt); + LLVM_FALLTHROUGH; + } case WebAssembly::END_BLOCK: case WebAssembly::END_LOOP: - case WebAssembly::END_TRY: EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; default: - // Something other than an `end`. We're done. + // Something other than an `end`. We're done for this BB. return; } } - } + // We've reached the beginning of a BB. Continue the search in the previous + // BB. + Worklist.push_back(MBB->getPrevNode()->rbegin()); + }; + + while (!Worklist.empty()) + Process(Worklist.pop_back_val()); } // WebAssembly functions end with an end instruction, as if the function body diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp index 159fb4c00ddc..78191ae758fe 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp @@ -20,7 +20,19 @@ using namespace llvm; WebAssemblyDebugValueManager::WebAssemblyDebugValueManager( MachineInstr *Instr) { - Instr->collectDebugValues(DbgValues); + // This code differs from MachineInstr::collectDebugValues in that it scans + // the whole BB, not just contiguous DBG_VALUEs. + if (!Instr->getOperand(0).isReg()) + return; + + MachineBasicBlock::iterator DI = *Instr; + ++DI; + for (MachineBasicBlock::iterator DE = Instr->getParent()->end(); DI != DE; + ++DI) { + if (DI->isDebugValue() && + DI->getDebugOperandForReg(Instr->getOperand(0).getReg())) + DbgValues.push_back(&*DI); + } } void WebAssemblyDebugValueManager::move(MachineInstr *Insert) { diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index 55925bcbe771..ac94e9e80d01 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -96,8 +96,10 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) { return WebAssembly::DROP_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::DROP_V128; - if (RC == &WebAssembly::EXNREFRegClass) - return WebAssembly::DROP_EXNREF; + if (RC == &WebAssembly::FUNCREFRegClass) + return WebAssembly::DROP_FUNCREF; + if (RC == &WebAssembly::EXTERNREFRegClass) + return WebAssembly::DROP_EXTERNREF; llvm_unreachable("Unexpected register class"); } @@ -113,8 +115,10 @@ static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_GET_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::LOCAL_GET_V128; - if (RC == &WebAssembly::EXNREFRegClass) - return WebAssembly::LOCAL_GET_EXNREF; + if (RC == &WebAssembly::FUNCREFRegClass) + return WebAssembly::LOCAL_GET_FUNCREF; + if (RC == &WebAssembly::EXTERNREFRegClass) + return WebAssembly::LOCAL_GET_EXTERNREF; llvm_unreachable("Unexpected register class"); } @@ -130,8 +134,10 @@ static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_SET_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::LOCAL_SET_V128; - if (RC == &WebAssembly::EXNREFRegClass) - return WebAssembly::LOCAL_SET_EXNREF; + if (RC == &WebAssembly::FUNCREFRegClass) + return WebAssembly::LOCAL_SET_FUNCREF; + if (RC == &WebAssembly::EXTERNREFRegClass) + return WebAssembly::LOCAL_SET_EXTERNREF; llvm_unreachable("Unexpected register class"); } @@ -147,8 +153,10 @@ static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_TEE_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::LOCAL_TEE_V128; - if (RC == &WebAssembly::EXNREFRegClass) - return WebAssembly::LOCAL_TEE_EXNREF; + if (RC == &WebAssembly::FUNCREFRegClass) + return WebAssembly::LOCAL_TEE_FUNCREF; + if (RC == &WebAssembly::EXTERNREFRegClass) + return WebAssembly::LOCAL_TEE_EXTERNREF; llvm_unreachable("Unexpected register class"); } @@ -164,8 +172,10 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) { return MVT::f64; if (RC == &WebAssembly::V128RegClass) return MVT::v16i8; - if (RC == &WebAssembly::EXNREFRegClass) - return MVT::exnref; + if (RC == &WebAssembly::FUNCREFRegClass) + return MVT::funcref; + if (RC == &WebAssembly::EXTERNREFRegClass) + return MVT::externref; llvm_unreachable("unrecognized register class"); } @@ -221,6 +231,10 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { auto Local = static_cast<unsigned>(MI.getOperand(1).getImm()); Reg2Local[Reg] = Local; checkFrameBase(MFI, Local, Reg); + + // Update debug value to point to the local before removing. + WebAssemblyDebugValueManager(&MI).replaceWithLocal(Local); + MI.eraseFromParent(); Changed = true; } diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index c2a0d3e01740..82b032267d55 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -20,12 +20,14 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" +#include "WebAssemblyUtilities.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -128,7 +130,8 @@ private: case MVT::i64: case MVT::f32: case MVT::f64: - case MVT::exnref: + case MVT::funcref: + case MVT::externref: return VT; case MVT::f16: return MVT::f32; @@ -704,9 +707,13 @@ bool WebAssemblyFastISel::fastLowerArguments() { Opc = WebAssembly::ARGUMENT_v2f64; RC = &WebAssembly::V128RegClass; break; - case MVT::exnref: - Opc = WebAssembly::ARGUMENT_exnref; - RC = &WebAssembly::EXNREFRegClass; + case MVT::funcref: + Opc = WebAssembly::ARGUMENT_funcref; + RC = &WebAssembly::FUNCREFRegClass; + break; + case MVT::externref: + Opc = WebAssembly::ARGUMENT_externref; + RC = &WebAssembly::EXTERNREFRegClass; break; default: return false; @@ -806,8 +813,11 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { case MVT::v2f64: ResultReg = createResultReg(&WebAssembly::V128RegClass); break; - case MVT::exnref: - ResultReg = createResultReg(&WebAssembly::EXNREFRegClass); + case MVT::funcref: + ResultReg = createResultReg(&WebAssembly::FUNCREFRegClass); + break; + case MVT::externref: + ResultReg = createResultReg(&WebAssembly::EXTERNREFRegClass); break; default: return false; @@ -862,6 +872,15 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { // Add placeholders for the type index and immediate flags MIB.addImm(0); MIB.addImm(0); + + // Ensure that the object file has a __indirect_function_table import, as we + // call_indirect against it. + MCSymbolWasm *Sym = WebAssembly::getOrCreateFunctionTableSymbol( + MF->getMMI().getContext(), "__indirect_function_table"); + // Until call_indirect emits TABLE_NUMBER relocs against this symbol, mark + // it as NO_STRIP so as to ensure that the indirect function table makes it + // to linked output. + Sym->setNoStrip(); } for (unsigned ArgReg : Args) @@ -916,9 +935,13 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) { Opc = WebAssembly::SELECT_F64; RC = &WebAssembly::F64RegClass; break; - case MVT::exnref: - Opc = WebAssembly::SELECT_EXNREF; - RC = &WebAssembly::EXNREFRegClass; + case MVT::funcref: + Opc = WebAssembly::SELECT_FUNCREF; + RC = &WebAssembly::FUNCREFRegClass; + break; + case MVT::externref: + Opc = WebAssembly::SELECT_EXTERNREF; + RC = &WebAssembly::EXTERNREFRegClass; break; default: return false; @@ -1321,7 +1344,8 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: - case MVT::exnref: + case MVT::funcref: + case MVT::externref: break; default: return false; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp index 7f805b34b499..52aa3534c78e 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp @@ -41,13 +41,51 @@ public: char WebAssemblyFixBrTableDefaults::ID = 0; +// Target indepedent selection dag assumes that it is ok to use PointerTy +// as the index for a "switch", whereas Wasm so far only has a 32-bit br_table. +// See e.g. SelectionDAGBuilder::visitJumpTableHeader +// We have a 64-bit br_table in the tablegen defs as a result, which does get +// selected, and thus we get incorrect truncates/extensions happening on +// wasm64. Here we fix that. +void fixBrTableIndex(MachineInstr &MI, MachineBasicBlock *MBB, + MachineFunction &MF) { + // Only happens on wasm64. + auto &WST = MF.getSubtarget<WebAssemblySubtarget>(); + if (!WST.hasAddr64()) + return; + + assert(MI.getDesc().getOpcode() == WebAssembly::BR_TABLE_I64 && + "64-bit br_table pseudo instruction expected"); + + // Find extension op, if any. It sits in the previous BB before the branch. + auto ExtMI = MF.getRegInfo().getVRegDef(MI.getOperand(0).getReg()); + if (ExtMI->getOpcode() == WebAssembly::I64_EXTEND_U_I32) { + // Unnecessarily extending a 32-bit value to 64, remove it. + assert(MI.getOperand(0).getReg() == ExtMI->getOperand(0).getReg()); + MI.getOperand(0).setReg(ExtMI->getOperand(1).getReg()); + ExtMI->eraseFromParent(); + } else { + // Incoming 64-bit value that needs to be truncated. + Register Reg32 = + MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(), + WST.getInstrInfo()->get(WebAssembly::I32_WRAP_I64), Reg32) + .addReg(MI.getOperand(0).getReg()); + MI.getOperand(0).setReg(Reg32); + } + + // We now have a 32-bit operand in all cases, so change the instruction + // accordingly. + MI.setDesc(WST.getInstrInfo()->get(WebAssembly::BR_TABLE_I32)); +} + // `MI` is a br_table instruction with a dummy default target argument. This // function finds and adds the default target argument and removes any redundant // range check preceding the br_table. Returns the MBB that the br_table is // moved into so it can be removed from further consideration, or nullptr if the // br_table cannot be optimized. -MachineBasicBlock *fixBrTable(MachineInstr &MI, MachineBasicBlock *MBB, - MachineFunction &MF) { +MachineBasicBlock *fixBrTableDefault(MachineInstr &MI, MachineBasicBlock *MBB, + MachineFunction &MF) { // Get the header block, which contains the redundant range check. assert(MBB->pred_size() == 1 && "Expected a single guard predecessor"); auto *HeaderMBB = *MBB->pred_begin(); @@ -125,7 +163,8 @@ bool WebAssemblyFixBrTableDefaults::runOnMachineFunction(MachineFunction &MF) { MBBSet.erase(MBB); for (auto &MI : *MBB) { if (WebAssembly::isBrTable(MI)) { - auto *Fixed = fixBrTable(MI, MBB, MF); + fixBrTableIndex(MI, MBB, MF); + auto *Fixed = fixBrTableDefault(MI, MBB, MF); if (Fixed != nullptr) { MBBSet.erase(Fixed); Changed = true; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index dee1c4e28149..d75afdcefb7d 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -29,7 +29,12 @@ HANDLE_NODETYPE(SWIZZLE) HANDLE_NODETYPE(VEC_SHL) HANDLE_NODETYPE(VEC_SHR_S) HANDLE_NODETYPE(VEC_SHR_U) +HANDLE_NODETYPE(WIDEN_LOW_S) +HANDLE_NODETYPE(WIDEN_LOW_U) +HANDLE_NODETYPE(WIDEN_HIGH_S) +HANDLE_NODETYPE(WIDEN_HIGH_U) HANDLE_NODETYPE(THROW) +HANDLE_NODETYPE(CATCH) HANDLE_NODETYPE(MEMORY_COPY) HANDLE_NODETYPE(MEMORY_FILL) diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index d1a696f854f8..b9154b09fbbc 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -80,9 +80,6 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 : WebAssembly::GLOBAL_GET_I32; - auto ConstIns = - PtrVT == MVT::i64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; - auto AddIns = PtrVT == MVT::i64 ? WebAssembly::ADD_I64 : WebAssembly::ADD_I32; // Few custom selection stuff. SDLoc DL(Node); @@ -126,41 +123,6 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { return; } - case ISD::GlobalTLSAddress: { - const auto *GA = cast<GlobalAddressSDNode>(Node); - - if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory()) - report_fatal_error("cannot use thread-local storage without bulk memory", - false); - - // Currently Emscripten does not support dynamic linking with threads. - // Therefore, if we have thread-local storage, only the local-exec model - // is possible. - // TODO: remove this and implement proper TLS models once Emscripten - // supports dynamic linking with threads. - if (GA->getGlobal()->getThreadLocalMode() != - GlobalValue::LocalExecTLSModel && - !Subtarget->getTargetTriple().isOSEmscripten()) { - report_fatal_error("only -ftls-model=local-exec is supported for now on " - "non-Emscripten OSes: variable " + - GA->getGlobal()->getName(), - false); - } - - SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT); - SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress( - GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0); - - MachineSDNode *TLSBase = - CurDAG->getMachineNode(GlobalGetIns, DL, PtrVT, TLSBaseSym); - MachineSDNode *TLSOffset = - CurDAG->getMachineNode(ConstIns, DL, PtrVT, TLSOffsetSym); - MachineSDNode *TLSAddress = CurDAG->getMachineNode( - AddIns, DL, PtrVT, SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); - ReplaceNode(Node, TLSAddress); - return; - } - case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); switch (IntNo) { diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 925636c82321..e348bba2b04c 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -16,6 +16,7 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" +#include "WebAssemblyUtilities.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -31,6 +32,7 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -68,6 +70,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( computeRegisterProperties(Subtarget->getRegisterInfo()); setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom); setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); setOperationAction(ISD::JumpTable, MVTPtr, Custom); setOperationAction(ISD::BlockAddress, MVTPtr, Custom); @@ -123,6 +126,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Hoist bitcasts out of shuffles setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + // Combine extends of extract_subvectors into widening ops + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + // Support saturating add for i8x16 and i16x8 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) for (auto T : {MVT::v16i8, MVT::v8i16}) @@ -156,11 +163,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // There is no i8x16.mul instruction setOperationAction(ISD::MUL, MVT::v16i8, Expand); - // There are no vector select instructions - for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, - MVT::v2f64}) - setOperationAction(Op, T, Expand); + // There is no vector conditional select instruction + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) + setOperationAction(ISD::SELECT_CC, T, Expand); // Expand integer operations supported for scalars but not SIMD for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, @@ -247,6 +253,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); } + // And some truncating stores are legal as well + setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); } // Don't do anything clever with build_pairs @@ -258,6 +267,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Exception handling intrinsics setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setMaxAtomicSizeInBitsSupported(64); @@ -268,7 +278,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); // Define the emscripten name for return address helper. - // TODO: when implementing other WASM backends, make this generic or only do + // TODO: when implementing other Wasm backends, make this generic or only do // this on emscripten depending on what they end up doing. setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address"); @@ -442,6 +452,19 @@ static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults, const MCInstrDesc &MCID = TII.get(CallOp); MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); + // See if we must truncate the function pointer. + // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers + // as 64-bit for uniformity with other pointer types. + if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) { + Register Reg32 = + MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); + auto &FnPtr = CallParams.getOperand(0); + BuildMI(*BB, CallResults.getIterator(), DL, + TII.get(WebAssembly::I32_WRAP_I64), Reg32) + .addReg(FnPtr.getReg()); + FnPtr.setReg(Reg32); + } + // Move the function pointer to the end of the arguments for indirect calls if (IsIndirect) { auto FnPtr = CallParams.getOperand(0); @@ -456,6 +479,15 @@ static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults, if (IsIndirect) { MIB.addImm(0); MIB.addImm(0); + + // Ensure that the object file has a __indirect_function_table import, as we + // call_indirect against it. + MCSymbolWasm *Sym = WebAssembly::getOrCreateFunctionTableSymbol( + MF.getContext(), "__indirect_function_table"); + // Until call_indirect emits TABLE_NUMBER relocs against this symbol, mark + // it as NO_STRIP so as to ensure that the indirect function table makes it + // to linked output. + Sym->setNoStrip(); } for (auto Use : CallParams.uses()) @@ -542,6 +574,16 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint( if (VT.getSizeInBits() <= 64) return std::make_pair(0U, &WebAssembly::I64RegClass); } + if (VT.isFloatingPoint() && !VT.isVector()) { + switch (VT.getSizeInBits()) { + case 32: + return std::make_pair(0U, &WebAssembly::F32RegClass); + case 64: + return std::make_pair(0U, &WebAssembly::F64RegClass); + default: + break; + } + } break; default: break; @@ -626,7 +668,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { - case Intrinsic::wasm_atomic_notify: + case Intrinsic::wasm_memory_atomic_notify: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); @@ -640,7 +682,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // consistent. The same applies for wasm_atomic_wait intrinsics too. Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; - case Intrinsic::wasm_atomic_wait_i32: + case Intrinsic::wasm_memory_atomic_wait32: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); @@ -648,7 +690,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = Align(4); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; - case Intrinsic::wasm_atomic_wait_i64: + case Intrinsic::wasm_memory_atomic_wait64: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); @@ -656,6 +698,75 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; + case Intrinsic::wasm_load32_zero: + case Intrinsic::wasm_load64_zero: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8); + Info.flags = MachineMemOperand::MOLoad; + return true; + case Intrinsic::wasm_load8_lane: + case Intrinsic::wasm_load16_lane: + case Intrinsic::wasm_load32_lane: + case Intrinsic::wasm_load64_lane: + case Intrinsic::wasm_store8_lane: + case Intrinsic::wasm_store16_lane: + case Intrinsic::wasm_store32_lane: + case Intrinsic::wasm_store64_lane: { + MVT MemVT; + Align MemAlign; + switch (Intrinsic) { + case Intrinsic::wasm_load8_lane: + case Intrinsic::wasm_store8_lane: + MemVT = MVT::i8; + MemAlign = Align(1); + break; + case Intrinsic::wasm_load16_lane: + case Intrinsic::wasm_store16_lane: + MemVT = MVT::i16; + MemAlign = Align(2); + break; + case Intrinsic::wasm_load32_lane: + case Intrinsic::wasm_store32_lane: + MemVT = MVT::i32; + MemAlign = Align(4); + break; + case Intrinsic::wasm_load64_lane: + case Intrinsic::wasm_store64_lane: + MemVT = MVT::i64; + MemAlign = Align(8); + break; + default: + llvm_unreachable("unexpected intrinsic"); + } + if (Intrinsic == Intrinsic::wasm_load8_lane || + Intrinsic == Intrinsic::wasm_load16_lane || + Intrinsic == Intrinsic::wasm_load32_lane || + Intrinsic == Intrinsic::wasm_load64_lane) { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.flags = MachineMemOperand::MOLoad; + } else { + Info.opc = ISD::INTRINSIC_VOID; + Info.flags = MachineMemOperand::MOStore; + } + Info.ptrVal = I.getArgOperand(0); + Info.memVT = MemVT; + Info.offset = 0; + Info.align = MemAlign; + return true; + } + case Intrinsic::wasm_prefetch_t: + case Intrinsic::wasm_prefetch_nt: { + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::i8; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(1); + Info.flags = MachineMemOperand::MOLoad; + return true; + } default: return false; } @@ -866,8 +977,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, /*isSS=*/false); unsigned ValNo = 0; SmallVector<SDValue, 8> Chains; - for (SDValue Arg : - make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) { assert(ArgLocs[ValNo].getValNo() == ValNo && "ArgLocs should remain in order and only hold varargs args"); unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); @@ -876,7 +986,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getConstant(Offset, DL, PtrVT)); Chains.push_back( DAG.getStore(Chain, DL, Arg, Add, - MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); + MachinePointerInfo::getFixedStack(MF, FI, Offset))); } if (!Chains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); @@ -1091,6 +1201,8 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, return LowerFrameIndex(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::JumpTable: @@ -1199,6 +1311,49 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); } +SDValue +WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const auto *GA = cast<GlobalAddressSDNode>(Op); + MVT PtrVT = getPointerTy(DAG.getDataLayout()); + + MachineFunction &MF = DAG.getMachineFunction(); + if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory()) + report_fatal_error("cannot use thread-local storage without bulk memory", + false); + + const GlobalValue *GV = GA->getGlobal(); + + // Currently Emscripten does not support dynamic linking with threads. + // Therefore, if we have thread-local storage, only the local-exec model + // is possible. + // TODO: remove this and implement proper TLS models once Emscripten + // supports dynamic linking with threads. + if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel && + !Subtarget->getTargetTriple().isOSEmscripten()) { + report_fatal_error("only -ftls-model=local-exec is supported for now on " + "non-Emscripten OSes: variable " + + GV->getName(), + false); + } + + auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 + : WebAssembly::GLOBAL_GET_I32; + const char *BaseName = MF.createExternalSymbolName("__tls_base"); + + SDValue BaseAddr( + DAG.getMachineNode(GlobalGet, DL, PtrVT, + DAG.getTargetExternalSymbol(BaseName, PtrVT)), + 0); + + SDValue TLSOffset = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL); + SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset); + + return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr); +} + SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1303,7 +1458,22 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, MFI->getVarargBufferVreg(), PtrVT); return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), - MachinePointerInfo(SV), 0); + MachinePointerInfo(SV)); +} + +static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex, + SelectionDAG &DAG) { + // We only support C++ exceptions for now + int Tag = + cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue(); + if (Tag != WebAssembly::CPP_EXCEPTION) + llvm_unreachable("Invalid tag: We only support C++ exceptions for now"); + auto &MF = DAG.getMachineFunction(); + const auto &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + const char *SymName = MF.createExternalSymbolName("__cpp_exception"); + return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT, + DAG.getTargetExternalSymbol(SymName, PtrVT)); } SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, @@ -1339,15 +1509,7 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, } case Intrinsic::wasm_throw: { - // We only support C++ exceptions for now - int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); - if (Tag != CPP_EXCEPTION) - llvm_unreachable("Invalid tag!"); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); - const char *SymName = MF.createExternalSymbolName("__cpp_exception"); - SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, - DAG.getTargetExternalSymbol(SymName, PtrVT)); + SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG); return DAG.getNode(WebAssemblyISD::THROW, DL, MVT::Other, // outchain type { @@ -1357,6 +1519,19 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, }); } + case Intrinsic::wasm_catch: { + SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG); + return DAG.getNode(WebAssemblyISD::CATCH, DL, + { + MVT::i32, // outchain type + MVT::Other // return value + }, + { + Op.getOperand(0), // inchain + SymNode // exception symbol + }); + } + case Intrinsic::wasm_shuffle: { // Drop in-chain and replace undefs, but otherwise pass through unchanged SDValue Ops[18]; @@ -1474,8 +1649,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, SmallVector<SwizzleEntry, 16> SwizzleCounts; auto AddCount = [](auto &Counts, const auto &Val) { - auto CountIt = std::find_if(Counts.begin(), Counts.end(), - [&Val](auto E) { return E.first == Val; }); + auto CountIt = + llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; }); if (CountIt == Counts.end()) { Counts.emplace_back(Val, 1); } else { @@ -1537,6 +1712,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, }; } else if (NumConstantLanes >= NumSplatLanes && Subtarget->hasUnimplementedSIMD128()) { + // If we support v128.const, emit it directly SmallVector<SDValue, 16> ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1548,11 +1724,59 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } Result = DAG.getBuildVector(VecT, DL, ConstLanes); - IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { return IsConstant(Lane); }; - } - if (!Result) { + } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) { + // Otherwise, if this is an integer vector, pack the lane values together so + // we can construct the 128-bit constant from a pair of i64s using a splat + // followed by at most one i64x2.replace_lane. Also keep track of the lanes + // that actually matter so we can avoid the replace_lane in more cases. + std::array<uint64_t, 2> I64s{{0, 0}}; + std::array<uint64_t, 2> ConstLaneMasks{{0, 0}}; + size_t LaneBits = 128 / Lanes; + size_t HalfLanes = Lanes / 2; + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op.getOperand(I); + if (IsConstant(Lane)) { + // How much we need to shift Val to position it in an i64 + auto Shift = LaneBits * (I % HalfLanes); + auto Mask = maskTrailingOnes<uint64_t>(LaneBits); + auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask; + I64s[I / HalfLanes] |= Val << Shift; + ConstLaneMasks[I / HalfLanes] |= Mask << Shift; + } + } + // Check whether all constant lanes in the second half of the vector are + // equivalent in the first half or vice versa to determine whether splatting + // either side will be sufficient to materialize the constant. As a special + // case, if the first and second halves have no constant lanes in common, we + // can just combine them. + bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1]; + bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0]; + bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0; + + uint64_t Splatted; + if (SecondHalfSufficient) { + Splatted = I64s[1]; + } else if (CombinedSufficient) { + Splatted = I64s[0] | I64s[1]; + } else { + Splatted = I64s[0]; + } + + Result = DAG.getSplatBuildVector(MVT::v2i64, DL, + DAG.getConstant(Splatted, DL, MVT::i64)); + if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) { + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result, + DAG.getConstant(I64s[1], DL, MVT::i64), + DAG.getConstant(1, DL, MVT::i32)); + } + Result = DAG.getBitcast(VecT, Result); + IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; + } else { // Use a splat, but possibly a load_splat LoadSDNode *SplattedLoad; if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && @@ -1565,11 +1789,14 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, } else { Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); } - IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { return Lane == SplatValue; }; } + assert(Result); + assert(IsLaneConstructed); + // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); @@ -1730,6 +1957,49 @@ performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return DAG.getBitcast(DstType, NewShuffle); } +static SDValue performVectorWidenCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + auto &DAG = DCI.DAG; + assert(N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND); + + // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if + // possible before the extract_subvector can be expanded. + auto Extract = N->getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + auto Source = Extract.getOperand(0); + auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); + if (IndexNode == nullptr) + return SDValue(); + auto Index = IndexNode->getZExtValue(); + + // Only v8i8 and v4i16 extracts can be widened, and only if the extracted + // subvector is the low or high half of its source. + EVT ResVT = N->getValueType(0); + if (ResVT == MVT::v8i16) { + if (Extract.getValueType() != MVT::v8i8 || + Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8)) + return SDValue(); + } else if (ResVT == MVT::v4i32) { + if (Extract.getValueType() != MVT::v4i16 || + Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4)) + return SDValue(); + } else { + return SDValue(); + } + + bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND; + bool IsLow = Index == 0; + + unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S + : WebAssemblyISD::WIDEN_HIGH_S) + : (IsLow ? WebAssemblyISD::WIDEN_LOW_U + : WebAssemblyISD::WIDEN_HIGH_U); + + return DAG.getNode(Op, SDLoc(N), ResVT, Source); +} + SDValue WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -1738,5 +2008,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); case ISD::VECTOR_SHUFFLE: return performVECTOR_SHUFFLECombine(N, DCI); + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + return performVectorWidenCombine(N, DCI); } } diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index b8e612377529..c8a052d01199 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -106,6 +106,7 @@ private: SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index 256b77e33db9..22103b0bfb38 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -33,112 +33,117 @@ multiclass ATOMIC_NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "", //===----------------------------------------------------------------------===// let hasSideEffects = 1 in { -defm ATOMIC_NOTIFY_A32 : +defm MEMORY_ATOMIC_NOTIFY_A32 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", - "atomic.notify \t${off}${p2align}", 0x00, "false">; -defm ATOMIC_NOTIFY_A64 : + "memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", + "memory.atomic.notify \t${off}${p2align}", 0x00, "false">; +defm MEMORY_ATOMIC_NOTIFY_A64 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$count), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", - "atomic.notify \t${off}${p2align}", 0x00, "true">; + "memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", + "memory.atomic.notify \t${off}${p2align}", 0x00, "true">; let mayLoad = 1 in { -defm ATOMIC_WAIT_I32_A32 : +defm MEMORY_ATOMIC_WAIT32_A32 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "i32.atomic.wait \t${off}${p2align}", 0x01, "false">; -defm ATOMIC_WAIT_I32_A64 : + "memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "memory.atomic.wait32 \t${off}${p2align}", 0x01, "false">; +defm MEMORY_ATOMIC_WAIT32_A64 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "i32.atomic.wait \t${off}${p2align}", 0x01, "true">; -defm ATOMIC_WAIT_I64_A32 : + "memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "memory.atomic.wait32 \t${off}${p2align}", 0x01, "true">; +defm MEMORY_ATOMIC_WAIT64_A32 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "i64.atomic.wait \t${off}${p2align}", 0x02, "false">; -defm ATOMIC_WAIT_I64_A64 : + "memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "memory.atomic.wait64 \t${off}${p2align}", 0x02, "false">; +defm MEMORY_ATOMIC_WAIT64_A64 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I64:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "i64.atomic.wait \t${off}${p2align}", 0x02, "true">; + "memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "memory.atomic.wait64 \t${off}${p2align}", 0x02, "true">; } // mayLoad = 1 } // hasSideEffects = 1 -let Predicates = [HasAtomics] in { // Select notifys with no constant offset. def NotifyPatNoOffset_A32 : - Pat<(i32 (int_wasm_atomic_notify I32:$addr, I32:$count)), - (ATOMIC_NOTIFY_A32 0, 0, I32:$addr, I32:$count)>, - Requires<[HasAddr32]>; + Pat<(i32 (int_wasm_memory_atomic_notify I32:$addr, I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A32 0, 0, I32:$addr, I32:$count)>, + Requires<[HasAddr32, HasAtomics]>; def NotifyPatNoOffset_A64 : - Pat<(i32 (int_wasm_atomic_notify I64:$addr, I32:$count)), - (ATOMIC_NOTIFY_A64 0, 0, I64:$addr, I32:$count)>, - Requires<[HasAddr64]>; + Pat<(i32 (int_wasm_memory_atomic_notify I64:$addr, I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A64 0, 0, I64:$addr, I32:$count)>, + Requires<[HasAddr64, HasAtomics]>; // Select notifys with a constant offset. // Pattern with address + immediate offset multiclass NotifyPatImmOff<PatFrag operand, string inst> { - def : Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off), + def : Pat<(i32 (int_wasm_memory_atomic_notify (operand I32:$addr, imm:$off), I32:$count)), (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, I32:$count)>, - Requires<[HasAddr32]>; - def : Pat<(i32 (int_wasm_atomic_notify (operand I64:$addr, imm:$off), + Requires<[HasAddr32, HasAtomics]>; + def : Pat<(i32 (int_wasm_memory_atomic_notify (operand I64:$addr, imm:$off), I32:$count)), (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, I32:$count)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } -defm : NotifyPatImmOff<regPlusImm, "ATOMIC_NOTIFY">; -defm : NotifyPatImmOff<or_is_add, "ATOMIC_NOTIFY">; +defm : NotifyPatImmOff<regPlusImm, "MEMORY_ATOMIC_NOTIFY">; +defm : NotifyPatImmOff<or_is_add, "MEMORY_ATOMIC_NOTIFY">; // Select notifys with just a constant offset. def NotifyPatOffsetOnly_A32 : - Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)), - (ATOMIC_NOTIFY_A32 0, imm:$off, (CONST_I32 0), I32:$count)>, - Requires<[HasAddr32]>; + Pat<(i32 (int_wasm_memory_atomic_notify imm:$off, I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A32 0, imm:$off, (CONST_I32 0), I32:$count)>, + Requires<[HasAddr32, HasAtomics]>; def NotifyPatOffsetOnly_A64 : - Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)), - (ATOMIC_NOTIFY_A64 0, imm:$off, (CONST_I64 0), I32:$count)>, - Requires<[HasAddr64]>; + Pat<(i32 (int_wasm_memory_atomic_notify imm:$off, I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A64 0, imm:$off, (CONST_I64 0), I32:$count)>, + Requires<[HasAddr64, HasAtomics]>; def NotifyPatGlobalAddrOffOnly_A32 : - Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off), - I32:$count)), - (ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>, - Requires<[HasAddr32]>; + Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off), + I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count) + >, + Requires<[HasAddr32, HasAtomics, IsNotPIC]>; def NotifyPatGlobalAddrOffOnly_A64 : - Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off), - I32:$count)), - (ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count)>, - Requires<[HasAddr64]>; + Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off), + I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count) + >, + Requires<[HasAddr64, HasAtomics, IsNotPIC]>; // Select waits with no constant offset. multiclass WaitPatNoOffset<ValueType ty, Intrinsic kind, string inst> { def : Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, I64:$timeout)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(i32 (kind I64:$addr, ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, I64:$timeout)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } -defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">; -defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">; -defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">; -defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">; +defm : WaitPatNoOffset<i32, int_wasm_memory_atomic_wait32, + "MEMORY_ATOMIC_WAIT32">; +defm : WaitPatNoOffset<i64, int_wasm_memory_atomic_wait64, + "MEMORY_ATOMIC_WAIT64">; +defm : WaitPatNoOffset<i32, int_wasm_memory_atomic_wait32, + "MEMORY_ATOMIC_WAIT32">; +defm : WaitPatNoOffset<i64, int_wasm_memory_atomic_wait64, + "MEMORY_ATOMIC_WAIT64">; // Select waits with a constant offset. @@ -148,52 +153,53 @@ multiclass WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, def : Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(i32 (kind (operand I64:$addr, imm:$off), ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, I64:$timeout)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } -defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, - "ATOMIC_WAIT_I32">; -defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, - "ATOMIC_WAIT_I32">; -defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, - "ATOMIC_WAIT_I64">; -defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, - "ATOMIC_WAIT_I64">; - -// Select wait_i32, "ATOMIC_WAIT_I32s with just a constant offset. +defm : WaitPatImmOff<i32, int_wasm_memory_atomic_wait32, regPlusImm, + "MEMORY_ATOMIC_WAIT32">; +defm : WaitPatImmOff<i32, int_wasm_memory_atomic_wait32, or_is_add, + "MEMORY_ATOMIC_WAIT32">; +defm : WaitPatImmOff<i64, int_wasm_memory_atomic_wait64, regPlusImm, + "MEMORY_ATOMIC_WAIT64">; +defm : WaitPatImmOff<i64, int_wasm_memory_atomic_wait64, or_is_add, + "MEMORY_ATOMIC_WAIT64">; + +// Select waits with just a constant offset. multiclass WaitPatOffsetOnly<ValueType ty, Intrinsic kind, string inst> { def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp, I64:$timeout)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } -defm : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">; -defm : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">; +defm : WaitPatOffsetOnly<i32, int_wasm_memory_atomic_wait32, + "MEMORY_ATOMIC_WAIT32">; +defm : WaitPatOffsetOnly<i64, int_wasm_memory_atomic_wait64, + "MEMORY_ATOMIC_WAIT64">; multiclass WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, string inst> { def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics, IsNotPIC]>; def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)), (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp, I64:$timeout)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics, IsNotPIC]>; } -defm : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, - "ATOMIC_WAIT_I32">; -defm : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, - "ATOMIC_WAIT_I64">; -} // Predicates = [HasAtomics] +defm : WaitPatGlobalAddrOffOnly<i32, int_wasm_memory_atomic_wait32, + "MEMORY_ATOMIC_WAIT32">; +defm : WaitPatGlobalAddrOffOnly<i64, int_wasm_memory_atomic_wait64, + "MEMORY_ATOMIC_WAIT64">; //===----------------------------------------------------------------------===// // Atomic fences @@ -221,7 +227,6 @@ defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>; defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>; // Select loads with no constant offset. -let Predicates = [HasAtomics] in { defm : LoadPatNoOffset<i32, atomic_load_32, "ATOMIC_LOAD_I32">; defm : LoadPatNoOffset<i64, atomic_load_64, "ATOMIC_LOAD_I64">; @@ -240,7 +245,6 @@ defm : LoadPatOffsetOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">; defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">; defm : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">; -} // Predicates = [HasAtomics] // Extending loads. Note that there are only zero-extending atomic loads, no // sign-extending loads. @@ -285,7 +289,6 @@ def sext_aload_8_64 : def sext_aload_16_64 : PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>; -let Predicates = [HasAtomics] in { // Select zero-extending loads with no constant offset. defm : LoadPatNoOffset<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">; defm : LoadPatNoOffset<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">; @@ -344,7 +347,6 @@ defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; -} // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// // Atomic stores @@ -363,16 +365,15 @@ defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>; // store: (store $val, $ptr) // atomic_store: (store $ptr, $val) -let Predicates = [HasAtomics] in { // Select stores with no constant offset. multiclass AStorePatNoOffset<ValueType ty, PatFrag kind, string inst> { def : Pat<(kind I32:$addr, ty:$val), (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(kind I64:$addr, ty:$val), (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } defm : AStorePatNoOffset<i32, atomic_store_32, "ATOMIC_STORE_I32">; defm : AStorePatNoOffset<i64, atomic_store_64, "ATOMIC_STORE_I64">; @@ -384,10 +385,10 @@ multiclass AStorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, string inst> { def : Pat<(kind (operand I32:$addr, imm:$off), ty:$val), (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(kind (operand I64:$addr, imm:$off), ty:$val), (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } defm : AStorePatImmOff<i32, atomic_store_32, regPlusImm, "ATOMIC_STORE_I32">; defm : AStorePatImmOff<i64, atomic_store_64, regPlusImm, "ATOMIC_STORE_I64">; @@ -396,10 +397,10 @@ defm : AStorePatImmOff<i64, atomic_store_64, regPlusImm, "ATOMIC_STORE_I64">; multiclass AStorePatOffsetOnly<ValueType ty, PatFrag kind, string inst> { def : Pat<(kind imm:$off, ty:$val), (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(kind imm:$off, ty:$val), (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } defm : AStorePatOffsetOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">; defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">; @@ -407,15 +408,14 @@ defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">; multiclass AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> { def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val), (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics, IsNotPIC]>; def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val), (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics, IsNotPIC]>; } defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">; defm : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">; -} // Predicates = [HasAtomics] // Truncating stores. defm ATOMIC_STORE8_I32 : AtomicStore<I32, "i32.atomic.store8", 0x19>; @@ -436,7 +436,6 @@ def trunc_astore_8_64 : trunc_astore_64<atomic_store_8>; def trunc_astore_16_64 : trunc_astore_64<atomic_store_16>; def trunc_astore_32_64 : trunc_astore_64<atomic_store_32>; -let Predicates = [HasAtomics] in { // Truncating stores with no constant offset defm : AStorePatNoOffset<i32, atomic_store_8, "ATOMIC_STORE8_I32">; @@ -474,7 +473,6 @@ defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">; defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">; defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">; -} // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// // Atomic binary read-modify-writes @@ -580,10 +578,10 @@ defm ATOMIC_RMW32_U_XCHG_I64 : multiclass BinRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> { def : Pat<(ty (kind I32:$addr, ty:$val)), (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(ty (kind I64:$addr, ty:$val)), (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } // Select binary RMWs with a constant offset. @@ -593,29 +591,29 @@ multiclass BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, string inst> { def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$val)), (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } // Select binary RMWs with just a constant offset. multiclass BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> { def : Pat<(ty (kind imm:$off, ty:$val)), (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(ty (kind imm:$off, ty:$val)), (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } multiclass BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> { def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)), (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics, IsNotPIC]>; def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)), (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics, IsNotPIC]>; } // Patterns for various addressing modes. @@ -636,7 +634,6 @@ multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32, defm : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; } -let Predicates = [HasAtomics] in { defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64, "ATOMIC_RMW_ADD_I32", "ATOMIC_RMW_ADD_I64">; defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64, @@ -649,7 +646,6 @@ defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64, "ATOMIC_RMW_XOR_I32", "ATOMIC_RMW_XOR_I64">; defm : BinRMWPattern<atomic_swap_32, atomic_swap_64, "ATOMIC_RMW_XCHG_I32", "ATOMIC_RMW_XCHG_I64">; -} // Predicates = [HasAtomics] // Truncating & zero-extending binary RMW patterns. // These are combined patterns of truncating store patterns and zero-extending @@ -752,7 +748,6 @@ multiclass BinRMWTruncExtPattern< defm : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; } -let Predicates = [HasAtomics] in { defm : BinRMWTruncExtPattern< atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64, "ATOMIC_RMW8_U_ADD_I32", "ATOMIC_RMW16_U_ADD_I32", @@ -778,7 +773,6 @@ defm : BinRMWTruncExtPattern< "ATOMIC_RMW8_U_XCHG_I32", "ATOMIC_RMW16_U_XCHG_I32", "ATOMIC_RMW8_U_XCHG_I64", "ATOMIC_RMW16_U_XCHG_I64", "ATOMIC_RMW32_U_XCHG_I64">; -} // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// // Atomic ternary read-modify-writes @@ -827,10 +821,10 @@ defm ATOMIC_RMW32_U_CMPXCHG_I64 : multiclass TerRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> { def : Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)), (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, ty:$new)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(ty (kind I64:$addr, ty:$exp, ty:$new)), (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, ty:$new)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } // Select ternary RMWs with a constant offset. @@ -840,10 +834,10 @@ multiclass TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, string inst> { def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics]>; def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$exp, ty:$new)), (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, ty:$new)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics]>; } // Select ternary RMWs with just a constant offset. @@ -860,11 +854,11 @@ multiclass TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> { def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)), (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>, - Requires<[HasAddr32]>; + Requires<[HasAddr32, HasAtomics, IsNotPIC]>; def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)), (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp, ty:$new)>, - Requires<[HasAddr64]>; + Requires<[HasAddr64, HasAtomics, IsNotPIC]>; } // Patterns for various addressing modes. @@ -885,7 +879,6 @@ multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32, defm : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; } -let Predicates = [HasAtomics] in defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64, "ATOMIC_RMW_CMPXCHG_I32", "ATOMIC_RMW_CMPXCHG_I64">; @@ -994,7 +987,6 @@ multiclass TerRMWTruncExtPattern< defm : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; } -let Predicates = [HasAtomics] in defm : TerRMWTruncExtPattern< atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64, "ATOMIC_RMW8_U_CMPXCHG_I32", "ATOMIC_RMW16_U_CMPXCHG_I32", diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td index 3e9ef6fbc7ea..7aeae54d95a8 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -39,7 +39,7 @@ let mayStore = 1, hasSideEffects = 1 in defm MEMORY_INIT_A#B : BULK_I<(outs), (ins i32imm_op:$seg, i32imm_op:$idx, rc:$dest, - rc:$offset, rc:$size), + I32:$offset, I32:$size), (outs), (ins i32imm_op:$seg, i32imm_op:$idx), [], "memory.init\t$seg, $idx, $dest, $offset, $size", diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 171dd9a67beb..702560bea100 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -103,7 +103,7 @@ defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; } // isReturn = 1 -let isTrap = 1 in +let IsCanonical = 1, isTrap = 1 in defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; } // isTerminator = 1 @@ -131,14 +131,11 @@ defm THROW : I<(outs), (ins event_op:$tag, variable_ops), (outs), (ins event_op:$tag), [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))], "throw \t$tag", "throw \t$tag", 0x08>; -defm RETHROW : I<(outs), (ins EXNREF:$exn), (outs), (ins), [], - "rethrow \t$exn", "rethrow", 0x09>; -// Pseudo instruction to be the lowering target of int_wasm_rethrow_in_catch -// intrinsic. Will be converted to the real rethrow instruction later. -let isPseudo = 1 in -defm RETHROW_IN_CATCH : NRI<(outs), (ins), [(int_wasm_rethrow_in_catch)], - "rethrow_in_catch", 0>; +defm RETHROW : NRI<(outs), (ins i32imm:$depth), [], "rethrow \t$depth", 0x09>; } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 +// For C++ support, we only rethrow the latest exception, thus always setting +// the depth to 0. +def : Pat<(int_wasm_rethrow), (RETHROW 0)>; // Region within which an exception is caught: try / end_try let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in { @@ -146,26 +143,18 @@ defm TRY : NRI<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>; defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] -// Catching an exception: catch / extract_exception -let hasCtrlDep = 1, hasSideEffects = 1 in -defm CATCH : I<(outs EXNREF:$dst), (ins), (outs), (ins), [], - "catch \t$dst", "catch", 0x07>; - -// Querying / extracing exception: br_on_exn -// br_on_exn queries an exnref to see if it matches the corresponding exception -// tag index. If true it branches to the given label and pushes the -// corresponding argument values of the exception onto the stack. -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in -defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXNREF:$exn), - (outs), (ins bb_op:$dst, event_op:$tag), [], - "br_on_exn \t$dst, $tag, $exn", "br_on_exn \t$dst, $tag", - 0x0a>; -// This is a pseudo instruction that simulates popping a value from stack, which -// has been pushed by br_on_exn -let isCodeGenOnly = 1, hasSideEffects = 1 in -defm EXTRACT_EXCEPTION_I32 : NRI<(outs I32:$dst), (ins), - [(set I32:$dst, (int_wasm_extract_exception))], - "extract_exception\t$dst">; +// Catching an exception: catch / catch_all +let hasCtrlDep = 1, hasSideEffects = 1 in { +// Currently 'catch' can only extract an i32, which is sufficient for C++ +// support, but according to the spec 'catch' can extract any number of values +// based on the event type. +defm CATCH : I<(outs I32:$dst), (ins event_op:$tag), + (outs), (ins event_op:$tag), + [(set I32:$dst, + (WebAssemblycatch (WebAssemblywrapper texternalsym:$tag)))], + "catch \t$dst, $tag", "catch \t$tag", 0x07>; +defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x05>; +} // Pseudo instructions: cleanupret / catchret let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 6fe1fd2b5c5a..db2ad05b4cdf 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -76,8 +76,10 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, CopyOpcode = WebAssembly::COPY_F64; else if (RC == &WebAssembly::V128RegClass) CopyOpcode = WebAssembly::COPY_V128; - else if (RC == &WebAssembly::EXNREFRegClass) - CopyOpcode = WebAssembly::COPY_EXNREF; + else if (RC == &WebAssembly::FUNCREFRegClass) + CopyOpcode = WebAssembly::COPY_FUNCREF; + else if (RC == &WebAssembly::EXTERNREFRegClass) + CopyOpcode = WebAssembly::COPY_EXTERNREF; else llvm_unreachable("Unexpected register class"); @@ -139,14 +141,6 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, else FBB = MI.getOperand(0).getMBB(); break; - case WebAssembly::BR_ON_EXN: - if (HaveCond) - return true; - Cond.push_back(MachineOperand::CreateImm(true)); - Cond.push_back(MI.getOperand(2)); - TBB = MI.getOperand(0).getMBB(); - HaveCond = true; - break; } if (MI.isBarrier()) break; @@ -192,24 +186,10 @@ unsigned WebAssemblyInstrInfo::insertBranch( assert(Cond.size() == 2 && "Expected a flag and a successor block"); - MachineFunction &MF = *MBB.getParent(); - auto &MRI = MF.getRegInfo(); - bool IsBrOnExn = Cond[1].isReg() && MRI.getRegClass(Cond[1].getReg()) == - &WebAssembly::EXNREFRegClass; - - if (Cond[0].getImm()) { - if (IsBrOnExn) { - const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception"); - BuildMI(&MBB, DL, get(WebAssembly::BR_ON_EXN)) - .addMBB(TBB) - .addExternalSymbol(CPPExnSymbol) - .add(Cond[1]); - } else - BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]); - } else { - assert(!IsBrOnExn && "br_on_exn does not have a reversed condition"); + if (Cond[0].getImm()) + BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]); + else BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)).addMBB(TBB).add(Cond[1]); - } if (!FBB) return 1; @@ -220,14 +200,6 @@ unsigned WebAssemblyInstrInfo::insertBranch( bool WebAssemblyInstrInfo::reverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Expected a flag and a condition expression"); - - // br_on_exn's condition cannot be reversed - MachineFunction &MF = *Cond[1].getParent()->getParent()->getParent(); - auto &MRI = MF.getRegInfo(); - if (Cond[1].isReg() && - MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXNREFRegClass) - return true; - Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm()); return false; } diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 085910f01ee6..2f5a64a87a59 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -74,8 +74,6 @@ def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; -def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>; def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; @@ -83,7 +81,8 @@ def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; -def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, []>; +def SDT_WebAssemblyCatch : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>; //===----------------------------------------------------------------------===// // WebAssembly-specific DAG Nodes. @@ -109,6 +108,8 @@ def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC", SDT_WebAssemblyWrapperPIC>; def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow, [SDNPHasChain, SDNPVariadic]>; +def WebAssemblycatch : SDNode<"WebAssemblyISD::CATCH", SDT_WebAssemblyCatch, + [SDNPHasChain, SDNPSideEffect]>; //===----------------------------------------------------------------------===// // WebAssembly-specific Operands. @@ -163,6 +164,9 @@ def vec_i64imm_op : Operand<i64>; let OperandType = "OPERAND_FUNCTION32" in def function32_op : Operand<i32>; +let OperandType = "OPERAND_TABLE" in +def table32_op : Operand<i32>; + let OperandType = "OPERAND_OFFSET32" in def offset32_op : Operand<i32>; @@ -184,6 +188,11 @@ def Signature : Operand<i32> { let PrintMethod = "printWebAssemblySignatureOperand"; } +let OperandType = "OPERAND_HEAPTYPE" in +def HeapType : Operand<i32> { + let PrintMethod = "printWebAssemblyHeapTypeOperand"; +} + let OperandType = "OPERAND_TYPEINDEX" in def TypeIndex : Operand<i32>; @@ -236,7 +245,8 @@ defm "": ARGUMENT<I32, i32>; defm "": ARGUMENT<I64, i64>; defm "": ARGUMENT<F32, f32>; defm "": ARGUMENT<F64, f64>; -defm "": ARGUMENT<EXNREF, exnref>; +defm "": ARGUMENT<FUNCREF, funcref>; +defm "": ARGUMENT<EXTERNREF, externref>; // local.get and local.set are not generated by instruction selection; they // are implied by virtual register uses and defs. @@ -306,7 +316,8 @@ defm "" : LOCAL<I64>; defm "" : LOCAL<F32>; defm "" : LOCAL<F64>; defm "" : LOCAL<V128>, Requires<[HasSIMD128]>; -defm "" : LOCAL<EXNREF>, Requires<[HasExceptionHandling]>; +defm "" : LOCAL<FUNCREF>, Requires<[HasReferenceTypes]>; +defm "" : LOCAL<EXTERNREF>, Requires<[HasReferenceTypes]>; let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm), @@ -333,16 +344,25 @@ def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)), (CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>; def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), - (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>; + (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>; def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)), - (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>; + (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>; +def : Pat<(i64 (WebAssemblywrapperPIC tglobaladdr:$addr)), + (CONST_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>; + +def : Pat<(i32 (WebAssemblywrapper tglobaltlsaddr:$addr)), + (CONST_I32 tglobaltlsaddr:$addr)>, Requires<[HasAddr32]>; +def : Pat<(i64 (WebAssemblywrapper tglobaltlsaddr:$addr)), + (CONST_I64 tglobaltlsaddr:$addr)>, Requires<[HasAddr64]>; def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)), - (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC]>; + (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>; def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)), - (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC]>; + (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr32]>; +def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)), + (CONST_I64 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr64]>; def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>; def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>; @@ -361,3 +381,4 @@ include "WebAssemblyInstrAtomics.td" include "WebAssemblyInstrSIMD.td" include "WebAssemblyInstrRef.td" include "WebAssemblyInstrBulkMemory.td" +include "WebAssemblyInstrTable.td" diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index b3c63cc1f884..48b934457267 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -70,7 +70,7 @@ defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>; multiclass LoadPatNoOffset<ValueType ty, PatFrag kind, string inst> { def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>, Requires<[HasAddr32]>; - def : Pat<(ty (kind I64:$addr)), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>, + def : Pat<(ty (kind (i64 I64:$addr))), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>, Requires<[HasAddr64]>; } diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td index 14d723750f07..7f324fc11210 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -11,15 +11,29 @@ /// //===----------------------------------------------------------------------===// -defm SELECT_EXNREF : I<(outs EXNREF:$dst), - (ins EXNREF:$lhs, EXNREF:$rhs, I32:$cond), - (outs), (ins), - [(set EXNREF:$dst, - (select I32:$cond, EXNREF:$lhs, EXNREF:$rhs))], - "exnref.select\t$dst, $lhs, $rhs, $cond", - "exnref.select", 0x1b>; +multiclass REF_I<WebAssemblyRegClass reg, ValueType vt> { + defm REF_NULL_#reg : I<(outs reg:$res), (ins HeapType:$heaptype), + (outs), (ins HeapType:$heaptype), + [], + "ref.null\t$res, $heaptype", + "ref.null\t$heaptype", + 0xd0>, + Requires<[HasReferenceTypes]>; + defm SELECT_#reg: I<(outs reg:$dst), (ins reg:$lhs, reg:$rhs, I32:$cond), + (outs), (ins), + [(set reg:$dst, + (select I32:$cond, reg:$lhs, reg:$rhs))], + vt#".select\t$dst, $lhs, $rhs, $cond", + vt#".select", 0x1b>, + Requires<[HasReferenceTypes]>; +} -def : Pat<(select (i32 (setne I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs), - (SELECT_EXNREF EXNREF:$lhs, EXNREF:$rhs, I32:$cond)>; -def : Pat<(select (i32 (seteq I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs), - (SELECT_EXNREF EXNREF:$rhs, EXNREF:$lhs, I32:$cond)>; +defm "" : REF_I<FUNCREF, funcref>; +defm "" : REF_I<EXTERNREF, externref>; + +foreach reg = [FUNCREF, EXTERNREF] in { +def : Pat<(select (i32 (setne I32:$cond, 0)), reg:$lhs, reg:$rhs), + (!cast<Instruction>("SELECT_"#reg) reg:$lhs, reg:$rhs, I32:$cond)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), reg:$lhs, reg:$rhs), + (!cast<Instruction>("SELECT_"#reg) reg:$rhs, reg:$lhs, I32:$cond)>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 4f3da2f35c61..9f3d0f4ab2c3 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -16,7 +16,9 @@ multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> simdop = -1> { defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, - !or(0xfd00, !and(0xff, simdop))>, + !if(!ge(simdop, 0x100), + !or(0xfd0000, !and(0xffff, simdop)), + !or(0xfd00, !and(0xff, simdop)))>, Requires<[HasSIMD128]>; } @@ -35,6 +37,99 @@ def ImmI#SIZE : ImmLeaf<i32, foreach SIZE = [2, 4, 8, 16, 32] in def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; +// Create vector with identical lanes: splat +def splat2 : PatFrag<(ops node:$x), (build_vector $x, $x)>; +def splat4 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x)>; +def splat8 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x, + $x, $x, $x, $x)>; +def splat16 : PatFrag<(ops node:$x), + (build_vector $x, $x, $x, $x, $x, $x, $x, $x, + $x, $x, $x, $x, $x, $x, $x, $x)>; + +class Vec { + ValueType vt; + ValueType int_vt; + ValueType lane_vt; + WebAssemblyRegClass lane_rc; + int lane_bits; + ImmLeaf lane_idx; + PatFrag splat; + string prefix; + Vec split; +} + +def I8x16 : Vec { + let vt = v16i8; + let int_vt = vt; + let lane_vt = i32; + let lane_rc = I32; + let lane_bits = 8; + let lane_idx = LaneIdx16; + let splat = splat16; + let prefix = "i8x16"; +} + +def I16x8 : Vec { + let vt = v8i16; + let int_vt = vt; + let lane_vt = i32; + let lane_rc = I32; + let lane_bits = 16; + let lane_idx = LaneIdx8; + let splat = splat8; + let prefix = "i16x8"; + let split = I8x16; +} + +def I32x4 : Vec { + let vt = v4i32; + let int_vt = vt; + let lane_vt = i32; + let lane_rc = I32; + let lane_bits = 32; + let lane_idx = LaneIdx4; + let splat = splat4; + let prefix = "i32x4"; + let split = I16x8; +} + +def I64x2 : Vec { + let vt = v2i64; + let int_vt = vt; + let lane_vt = i64; + let lane_rc = I64; + let lane_bits = 64; + let lane_idx = LaneIdx2; + let splat = splat2; + let prefix = "i64x2"; + let split = I32x4; +} + +def F32x4 : Vec { + let vt = v4f32; + let int_vt = v4i32; + let lane_vt = f32; + let lane_rc = F32; + let lane_bits = 32; + let lane_idx = LaneIdx4; + let splat = splat4; + let prefix = "f32x4"; +} + +def F64x2 : Vec { + let vt = v2f64; + let int_vt = v2i64; + let lane_vt = f64; + let lane_rc = F64; + let lane_bits = 64; + let lane_idx = LaneIdx2; + let splat = splat2; + let prefix = "f64x2"; +} + +defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; +defvar IntVecs = [I8x16, I16x8, I32x4, I64x2]; + //===----------------------------------------------------------------------===// // Load and store //===----------------------------------------------------------------------===// @@ -53,116 +148,186 @@ defm LOAD_V128_A64 : "v128.load\t$off$p2align", 0>; } -// Def load and store patterns from WebAssemblyInstrMemory.td for vector types -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">; -defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">; -defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">; -defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">; -defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">; +// Def load patterns from WebAssemblyInstrMemory.td for vector types +foreach vec = AllVecs in { +defm : LoadPatNoOffset<vec.vt, load, "LOAD_V128">; +defm : LoadPatImmOff<vec.vt, load, regPlusImm, "LOAD_V128">; +defm : LoadPatImmOff<vec.vt, load, or_is_add, "LOAD_V128">; +defm : LoadPatOffsetOnly<vec.vt, load, "LOAD_V128">; +defm : LoadPatGlobalAddrOffOnly<vec.vt, load, "LOAD_V128">; } -// vNxM.load_splat -multiclass SIMDLoadSplat<string vec, bits<32> simdop> { +// v128.loadX_splat +multiclass SIMDLoadSplat<int size, bits<32> simdop> { let mayLoad = 1, UseNamedOperandTable = 1 in { - defm LOAD_SPLAT_#vec#_A32 : + defm LOAD#size#_SPLAT_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - vec#".load_splat\t$dst, ${off}(${addr})$p2align", - vec#".load_splat\t$off$p2align", simdop>; - defm LOAD_SPLAT_#vec#_A64 : + "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", + "v128.load"#size#"_splat\t$off$p2align", simdop>; + defm LOAD#size#_SPLAT_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - vec#".load_splat\t$dst, ${off}(${addr})$p2align", - vec#".load_splat\t$off$p2align", simdop>; + "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", + "v128.load"#size#"_splat\t$off$p2align", simdop>; } } -defm "" : SIMDLoadSplat<"v8x16", 7>; -defm "" : SIMDLoadSplat<"v16x8", 8>; -defm "" : SIMDLoadSplat<"v32x4", 9>; -defm "" : SIMDLoadSplat<"v64x2", 10>; +defm "" : SIMDLoadSplat<8, 7>; +defm "" : SIMDLoadSplat<16, 8>; +defm "" : SIMDLoadSplat<32, 9>; +defm "" : SIMDLoadSplat<64, 10>; def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; -foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], - ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { -defm : LoadPatNoOffset<!cast<ValueType>(args[0]), - load_splat, - "LOAD_SPLAT_"#args[1]>; -defm : LoadPatImmOff<!cast<ValueType>(args[0]), - load_splat, - regPlusImm, - "LOAD_SPLAT_"#args[1]>; -defm : LoadPatImmOff<!cast<ValueType>(args[0]), - load_splat, - or_is_add, - "LOAD_SPLAT_"#args[1]>; -defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]), - load_splat, - "LOAD_SPLAT_"#args[1]>; -defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]), - load_splat, - "LOAD_SPLAT_"#args[1]>; +foreach vec = AllVecs in { +defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; +defm : LoadPatNoOffset<vec.vt, load_splat, inst>; +defm : LoadPatImmOff<vec.vt, load_splat, regPlusImm, inst>; +defm : LoadPatImmOff<vec.vt, load_splat, or_is_add, inst>; +defm : LoadPatOffsetOnly<vec.vt, load_splat, inst>; +defm : LoadPatGlobalAddrOffOnly<vec.vt, load_splat, inst>; } // Load and extend -multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> { +multiclass SIMDLoadExtend<Vec vec, string loadPat, bits<32> simdop> { + defvar signed = vec.prefix#".load"#loadPat#"_s"; + defvar unsigned = vec.prefix#".load"#loadPat#"_u"; let mayLoad = 1, UseNamedOperandTable = 1 in { - defm LOAD_EXTEND_S_#vec_t#_A32 : + defm LOAD_EXTEND_S_#vec#_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - name#"_s\t$dst, ${off}(${addr})$p2align", - name#"_s\t$off$p2align", simdop>; - defm LOAD_EXTEND_U_#vec_t#_A32 : + signed#"\t$dst, ${off}(${addr})$p2align", + signed#"\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec#_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - name#"_u\t$dst, ${off}(${addr})$p2align", - name#"_u\t$off$p2align", !add(simdop, 1)>; - defm LOAD_EXTEND_S_#vec_t#_A64 : + unsigned#"\t$dst, ${off}(${addr})$p2align", + unsigned#"\t$off$p2align", !add(simdop, 1)>; + defm LOAD_EXTEND_S_#vec#_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - name#"_s\t$dst, ${off}(${addr})$p2align", - name#"_s\t$off$p2align", simdop>; - defm LOAD_EXTEND_U_#vec_t#_A64 : + signed#"\t$dst, ${off}(${addr})$p2align", + signed#"\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec#_A64 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), (outs), (ins P2Align:$p2align, offset64_op:$off), [], - name#"_u\t$dst, ${off}(${addr})$p2align", - name#"_u\t$off$p2align", !add(simdop, 1)>; + unsigned#"\t$dst, ${off}(${addr})$p2align", + unsigned#"\t$off$p2align", !add(simdop, 1)>; } } -defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>; -defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>; -defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>; +defm "" : SIMDLoadExtend<I16x8, "8x8", 1>; +defm "" : SIMDLoadExtend<I32x4, "16x4", 3>; +defm "" : SIMDLoadExtend<I64x2, "32x2", 5>; + +foreach vec = [I16x8, I32x4, I64x2] in +foreach exts = [["sextloadvi", "_S"], + ["zextloadvi", "_U"], + ["extloadvi", "_U"]] in { +defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits); +defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec; +defm : LoadPatNoOffset<vec.vt, loadpat, inst>; +defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>; +defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>; +defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>; +defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>; +} + +// Load lane into zero vector +multiclass SIMDLoadZero<Vec vec, bits<32> simdop> { + defvar name = "v128.load"#vec.lane_bits#"_zero"; + let mayLoad = 1, UseNamedOperandTable = 1 in { + defm LOAD_ZERO_#vec#_A32 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + name#"\t$dst, ${off}(${addr})$p2align", + name#"\t$off$p2align", simdop>; + defm LOAD_ZERO_#vec#_A64 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + name#"\t$dst, ${off}(${addr})$p2align", + name#"\t$off$p2align", simdop>; + } // mayLoad = 1, UseNamedOperandTable = 1 +} + +// TODO: Also support v4f32 and v2f64 once the instructions are merged +// to the proposal +defm "" : SIMDLoadZero<I32x4, 252>; +defm "" : SIMDLoadZero<I64x2, 253>; + +foreach vec = [I32x4, I64x2] in { +defvar loadpat = !cast<Intrinsic>("int_wasm_load"#vec.lane_bits#"_zero"); +defvar inst = "LOAD_ZERO_"#vec; +defm : LoadPatNoOffset<vec.vt, loadpat, inst>; +defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>; +defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>; +defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>; +defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>; +} + +// Load lane +multiclass SIMDLoadLane<Vec vec, bits<32> simdop> { + defvar name = "v128.load"#vec.lane_bits#"_lane"; + let mayLoad = 1, UseNamedOperandTable = 1 in { + defm LOAD_LANE_#vec#_A32 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, + I32:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), + [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", + name#"\t$off$p2align, $idx", simdop>; + defm LOAD_LANE_#vec#_A64 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, + I64:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), + [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", + name#"\t$off$p2align, $idx", simdop>; + } // mayLoad = 1, UseNamedOperandTable = 1 +} -foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in -foreach exts = [["sextloadv", "_S"], - ["zextloadv", "_U"], - ["extloadv", "_U"]] in { -defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]), - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm, - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add, - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; -defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), - "LOAD_EXTEND"#exts[1]#"_"#types[0]>; +// TODO: Also support v4f32 and v2f64 once the instructions are merged +// to the proposal +defm "" : SIMDLoadLane<I8x16, 88>; +defm "" : SIMDLoadLane<I16x8, 89>; +defm "" : SIMDLoadLane<I32x4, 90>; +defm "" : SIMDLoadLane<I64x2, 91>; + +// Select loads with no constant offset. +multiclass LoadLanePatNoOffset<Vec vec, PatFrag kind> { + defvar load_lane_a32 = !cast<NI>("LOAD_LANE_"#vec#"_A32"); + defvar load_lane_a64 = !cast<NI>("LOAD_LANE_"#vec#"_A64"); + def : Pat<(vec.vt (kind (i32 I32:$addr), + (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), + (load_lane_a32 0, 0, imm:$idx, $addr, $vec)>, + Requires<[HasAddr32]>; + def : Pat<(vec.vt (kind (i64 I64:$addr), + (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), + (load_lane_a64 0, 0, imm:$idx, $addr, $vec)>, + Requires<[HasAddr64]>; } +defm : LoadLanePatNoOffset<I8x16, int_wasm_load8_lane>; +defm : LoadLanePatNoOffset<I16x8, int_wasm_load16_lane>; +defm : LoadLanePatNoOffset<I32x4, int_wasm_load32_lane>; +defm : LoadLanePatNoOffset<I64x2, int_wasm_load64_lane>; + +// TODO: Also support the other load patterns for load_lane once the instructions +// are merged to the proposal. // Store: v128.store let mayStore = 1, UseNamedOperandTable = 1 in { @@ -177,30 +342,77 @@ defm STORE_V128_A64 : "v128.store\t${off}(${addr})$p2align, $vec", "v128.store\t$off$p2align", 11>; } -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -// Def load and store patterns from WebAssemblyInstrMemory.td for vector types -defm : StorePatNoOffset<vec_t, store, "STORE_V128">; -defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">; -defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">; -defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">; -defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">; + +// Def store patterns from WebAssemblyInstrMemory.td for vector types +foreach vec = AllVecs in { +defm : StorePatNoOffset<vec.vt, store, "STORE_V128">; +defm : StorePatImmOff<vec.vt, store, regPlusImm, "STORE_V128">; +defm : StorePatImmOff<vec.vt, store, or_is_add, "STORE_V128">; +defm : StorePatOffsetOnly<vec.vt, store, "STORE_V128">; +defm : StorePatGlobalAddrOffOnly<vec.vt, store, "STORE_V128">; +} + +// Store lane +multiclass SIMDStoreLane<Vec vec, bits<32> simdop> { + defvar name = "v128.store"#vec.lane_bits#"_lane"; + let mayStore = 1, UseNamedOperandTable = 1 in { + defm STORE_LANE_#vec#_A32 : + SIMD_I<(outs), + (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, + I32:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), + [], name#"\t${off}(${addr})$p2align, $vec, $idx", + name#"\t$off$p2align, $idx", simdop>; + defm STORE_LANE_#vec#_A64 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, + I64:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), + [], name#"\t${off}(${addr})$p2align, $vec, $idx", + name#"\t$off$p2align, $idx", simdop>; + } // mayStore = 1, UseNamedOperandTable = 1 +} + +// TODO: Also support v4f32 and v2f64 once the instructions are merged +// to the proposal +defm "" : SIMDStoreLane<I8x16, 92>; +defm "" : SIMDStoreLane<I16x8, 93>; +defm "" : SIMDStoreLane<I32x4, 94>; +defm "" : SIMDStoreLane<I64x2, 95>; + +// Select stores with no constant offset. +multiclass StoreLanePatNoOffset<Vec vec, PatFrag kind> { + def : Pat<(kind (i32 I32:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), + (!cast<NI>("STORE_LANE_"#vec#"_A32") 0, 0, imm:$idx, $addr, $vec)>, + Requires<[HasAddr32]>; + def : Pat<(kind (i64 I64:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), + (!cast<NI>("STORE_LANE_"#vec#"_A64") 0, 0, imm:$idx, $addr, $vec)>, + Requires<[HasAddr64]>; } +defm : StoreLanePatNoOffset<I8x16, int_wasm_store8_lane>; +defm : StoreLanePatNoOffset<I16x8, int_wasm_store16_lane>; +defm : StoreLanePatNoOffset<I32x4, int_wasm_store32_lane>; +defm : StoreLanePatNoOffset<I64x2, int_wasm_store64_lane>; + +// TODO: Also support the other store patterns for store_lane once the +// instructions are merged to the proposal. + //===----------------------------------------------------------------------===// // Constructing SIMD values //===----------------------------------------------------------------------===// // Constant: v128.const -multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> { +multiclass ConstVec<Vec vec, dag ops, dag pat, string args> { let isMoveImm = 1, isReMaterializable = 1, Predicates = [HasUnimplementedSIMD128] in - defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, - [(set V128:$dst, (vec_t pat))], - "v128.const\t$dst, "#args, - "v128.const\t"#args, 12>; + defm CONST_V128_#vec : SIMD_I<(outs V128:$dst), ops, (outs), ops, + [(set V128:$dst, (vec.vt pat))], + "v128.const\t$dst, "#args, + "v128.const\t"#args, 12>; } -defm "" : ConstVec<v16i8, +defm "" : ConstVec<I8x16, (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1, vec_i8imm_op:$i2, vec_i8imm_op:$i3, vec_i8imm_op:$i4, vec_i8imm_op:$i5, @@ -215,7 +427,7 @@ defm "" : ConstVec<v16i8, ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF), !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ", "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>; -defm "" : ConstVec<v8i16, +defm "" : ConstVec<I16x8, (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1, vec_i16imm_op:$i2, vec_i16imm_op:$i3, vec_i16imm_op:$i4, vec_i16imm_op:$i5, @@ -225,23 +437,23 @@ defm "" : ConstVec<v8i16, ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7), "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">; let IsCanonical = 1 in -defm "" : ConstVec<v4i32, +defm "" : ConstVec<I32x4, (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1, vec_i32imm_op:$i2, vec_i32imm_op:$i3), (build_vector (i32 imm:$i0), (i32 imm:$i1), (i32 imm:$i2), (i32 imm:$i3)), "$i0, $i1, $i2, $i3">; -defm "" : ConstVec<v2i64, +defm "" : ConstVec<I64x2, (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1), (build_vector (i64 imm:$i0), (i64 imm:$i1)), "$i0, $i1">; -defm "" : ConstVec<v4f32, +defm "" : ConstVec<F32x4, (ins f32imm_op:$i0, f32imm_op:$i1, f32imm_op:$i2, f32imm_op:$i3), (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1), (f32 fpimm:$i2), (f32 fpimm:$i3)), "$i0, $i1, $i2, $i3">; -defm "" : ConstVec<v2f64, +defm "" : ConstVec<F64x2, (ins f64imm_op:$i0, f64imm_op:$i1), (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)), "$i0, $i1">; @@ -269,10 +481,10 @@ defm SHUFFLE : vec_i8imm_op:$mC, vec_i8imm_op:$mD, vec_i8imm_op:$mE, vec_i8imm_op:$mF), [], - "v8x16.shuffle\t$dst, $x, $y, "# + "i8x16.shuffle\t$dst, $x, $y, "# "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", - "v8x16.shuffle\t"# + "i8x16.shuffle\t"# "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 13>; @@ -280,8 +492,8 @@ defm SHUFFLE : // Shuffles after custom lowering def wasm_shuffle_t : SDTypeProfile<1, 18, []>; def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), +foreach vec = AllVecs in { +def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y), (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), @@ -290,178 +502,150 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))), - (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y), - (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), - (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), - (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), - (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), - (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), - (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), - (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), - (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; + (SHUFFLE $x, $y, + imm:$m0, imm:$m1, imm:$m2, imm:$m3, + imm:$m4, imm:$m5, imm:$m6, imm:$m7, + imm:$m8, imm:$m9, imm:$mA, imm:$mB, + imm:$mC, imm:$mD, imm:$mE, imm:$mF)>; } -// Swizzle lanes: v8x16.swizzle +// Swizzle lanes: i8x16.swizzle def wasm_swizzle_t : SDTypeProfile<1, 2, []>; def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; defm SWIZZLE : SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), [(set (v16i8 V128:$dst), (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], - "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; + "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>; def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), - (SWIZZLE V128:$src, V128:$mask)>; + (SWIZZLE $src, $mask)>; + +multiclass Splat<Vec vec, bits<32> simdop> { + defm SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins vec.lane_rc:$x), + (outs), (ins), + [(set (vec.vt V128:$dst), + (vec.splat vec.lane_rc:$x))], + vec.prefix#".splat\t$dst, $x", vec.prefix#".splat", + simdop>; +} -// Create vector with identical lanes: splat -def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; -def splat4 : PatFrag<(ops node:$x), (build_vector - node:$x, node:$x, node:$x, node:$x)>; -def splat8 : PatFrag<(ops node:$x), (build_vector - node:$x, node:$x, node:$x, node:$x, - node:$x, node:$x, node:$x, node:$x)>; -def splat16 : PatFrag<(ops node:$x), (build_vector - node:$x, node:$x, node:$x, node:$x, - node:$x, node:$x, node:$x, node:$x, - node:$x, node:$x, node:$x, node:$x, - node:$x, node:$x, node:$x, node:$x)>; - -multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, - PatFrag splat_pat, bits<32> simdop> { - defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins), - [(set (vec_t V128:$dst), (splat_pat reg_t:$x))], - vec#".splat\t$dst, $x", vec#".splat", simdop>; -} - -defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>; -defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>; -defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>; -defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>; -defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>; -defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>; +defm "" : Splat<I8x16, 15>; +defm "" : Splat<I16x8, 16>; +defm "" : Splat<I32x4, 17>; +defm "" : Splat<I64x2, 18>; +defm "" : Splat<F32x4, 19>; +defm "" : Splat<F64x2, 20>; // scalar_to_vector leaves high lanes undefined, so can be a splat -class ScalarSplatPat<ValueType vec_t, ValueType lane_t, - WebAssemblyRegClass reg_t> : - Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))), - (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>; - -def : ScalarSplatPat<v16i8, i32, I32>; -def : ScalarSplatPat<v8i16, i32, I32>; -def : ScalarSplatPat<v4i32, i32, I32>; -def : ScalarSplatPat<v2i64, i64, I64>; -def : ScalarSplatPat<v4f32, f32, F32>; -def : ScalarSplatPat<v2f64, f64, F64>; +foreach vec = AllVecs in +def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))), + (!cast<Instruction>("SPLAT_"#vec) $x)>; //===----------------------------------------------------------------------===// // Accessing lanes //===----------------------------------------------------------------------===// // Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u -multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, - bits<32> simdop, string suffix = ""> { - defm EXTRACT_LANE_#vec_t#suffix : - SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx), +multiclass ExtractLane<Vec vec, bits<32> simdop, string suffix = ""> { + defm EXTRACT_LANE_#vec#suffix : + SIMD_I<(outs vec.lane_rc:$dst), (ins V128:$vec, vec_i8imm_op:$idx), (outs), (ins vec_i8imm_op:$idx), [], - vec#".extract_lane"#suffix#"\t$dst, $vec, $idx", - vec#".extract_lane"#suffix#"\t$idx", simdop>; + vec.prefix#".extract_lane"#suffix#"\t$dst, $vec, $idx", + vec.prefix#".extract_lane"#suffix#"\t$idx", simdop>; } -defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">; -defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">; -defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">; -defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">; -defm "" : ExtractLane<v4i32, "i32x4", I32, 27>; -defm "" : ExtractLane<v2i64, "i64x2", I64, 29>; -defm "" : ExtractLane<v4f32, "f32x4", F32, 31>; -defm "" : ExtractLane<v2f64, "f64x2", F64, 33>; +defm "" : ExtractLane<I8x16, 21, "_s">; +defm "" : ExtractLane<I8x16, 22, "_u">; +defm "" : ExtractLane<I16x8, 24, "_s">; +defm "" : ExtractLane<I16x8, 25, "_u">; +defm "" : ExtractLane<I32x4, 27>; +defm "" : ExtractLane<I64x2, 29>; +defm "" : ExtractLane<F32x4, 31>; +defm "" : ExtractLane<F64x2, 33>; def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), - (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>; def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), - (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>; def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), - (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I32x4 $vec, imm:$idx)>; def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), - (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>; + (EXTRACT_LANE_F32x4 $vec, imm:$idx)>; def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), - (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I64x2 $vec, imm:$idx)>; def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), - (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>; + (EXTRACT_LANE_F64x2 $vec, imm:$idx)>; def : Pat< (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), - (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I8x16_s $vec, imm:$idx)>; def : Pat< (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), - (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>; def : Pat< (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), - (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I16x8_s $vec, imm:$idx)>; def : Pat< (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), - (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; + (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>; // Replace lane value: replace_lane -multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t, - WebAssemblyRegClass reg_t, ValueType lane_t, - bits<32> simdop> { - defm REPLACE_LANE_#vec_t : - SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x), - (outs), (ins vec_i8imm_op:$idx), - [(set V128:$dst, (vector_insert - (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))], - vec#".replace_lane\t$dst, $vec, $idx, $x", - vec#".replace_lane\t$idx", simdop>; -} - -defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>; -defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>; -defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>; -defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>; -defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>; -defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>; +multiclass ReplaceLane<Vec vec, bits<32> simdop> { + defm REPLACE_LANE_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, vec.lane_rc:$x), + (outs), (ins vec_i8imm_op:$idx), + [(set V128:$dst, (vector_insert + (vec.vt V128:$vec), + (vec.lane_vt vec.lane_rc:$x), + (i32 vec.lane_idx:$idx)))], + vec.prefix#".replace_lane\t$dst, $vec, $idx, $x", + vec.prefix#".replace_lane\t$idx", simdop>; +} + +defm "" : ReplaceLane<I8x16, 23>; +defm "" : ReplaceLane<I16x8, 26>; +defm "" : ReplaceLane<I32x4, 28>; +defm "" : ReplaceLane<I64x2, 30>; +defm "" : ReplaceLane<F32x4, 32>; +defm "" : ReplaceLane<F64x2, 34>; // Lower undef lane indices to zero def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), - (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>; + (REPLACE_LANE_I8x16 $vec, 0, $x)>; def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef), - (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>; + (REPLACE_LANE_I16x8 $vec, 0, $x)>; def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef), - (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>; + (REPLACE_LANE_I32x4 $vec, 0, $x)>; def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef), - (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>; + (REPLACE_LANE_I64x2 $vec, 0, $x)>; def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef), - (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>; + (REPLACE_LANE_F32x4 $vec, 0, $x)>; def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef), - (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>; + (REPLACE_LANE_F64x2 $vec, 0, $x)>; //===----------------------------------------------------------------------===// // Comparisons //===----------------------------------------------------------------------===// -multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec, - string name, CondCode cond, bits<32> simdop> { - defm _#vec_t : +multiclass SIMDCondition<Vec vec, string name, CondCode cond, bits<32> simdop> { + defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), - [(set (out_t V128:$dst), - (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond) - )], - vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>; + [(set (vec.int_vt V128:$dst), + (setcc (vec.vt V128:$lhs), (vec.vt V128:$rhs), cond))], + vec.prefix#"."#name#"\t$dst, $lhs, $rhs", + vec.prefix#"."#name, simdop>; } multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> { - defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>; - defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond, - !add(baseInst, 10)>; - defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond, - !add(baseInst, 20)>; + defm "" : SIMDCondition<I8x16, name, cond, baseInst>; + defm "" : SIMDCondition<I16x8, name, cond, !add(baseInst, 10)>; + defm "" : SIMDCondition<I32x4, name, cond, !add(baseInst, 20)>; } multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { - defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>; - defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond, - !add(baseInst, 6)>; + defm "" : SIMDCondition<F32x4, name, cond, baseInst>; + defm "" : SIMDCondition<F64x2, name, cond, !add(baseInst, 6)>; } // Equality: eq @@ -499,108 +683,157 @@ defm GE : SIMDConditionFP<"ge", SETOGE, 70>; // Lower float comparisons that don't care about NaN to standard WebAssembly // float comparisons. These instructions are generated with nnan and in the // target-independent expansion of unordered comparisons and ordered ne. -foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32], - [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in +foreach nodes = [[seteq, EQ_F32x4], [setne, NE_F32x4], [setlt, LT_F32x4], + [setgt, GT_F32x4], [setle, LE_F32x4], [setge, GE_F32x4]] in def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))), - (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>; + (nodes[1] $lhs, $rhs)>; -foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64], - [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in +foreach nodes = [[seteq, EQ_F64x2], [setne, NE_F64x2], [setlt, LT_F64x2], + [setgt, GT_F64x2], [setle, LE_F64x2], [setge, GE_F64x2]] in def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), - (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>; + (nodes[1] $lhs, $rhs)>; + +// Prototype i64x2.eq +defm EQ_v2i64 : + SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), + [(set (v2i64 V128:$dst), + (int_wasm_eq (v2i64 V128:$lhs), (v2i64 V128:$rhs)))], + "i64x2.eq\t$dst, $lhs, $rhs", "i64x2.eq", 192>; //===----------------------------------------------------------------------===// // Bitwise operations //===----------------------------------------------------------------------===// -multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name, - bits<32> simdop> { - defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), - (outs), (ins), - [(set (vec_t V128:$dst), - (node (vec_t V128:$lhs), (vec_t V128:$rhs)) - )], - vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, - simdop>; +multiclass SIMDBinary<Vec vec, SDNode node, string name, bits<32> simdop> { + defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + (outs), (ins), + [(set (vec.vt V128:$dst), + (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], + vec.prefix#"."#name#"\t$dst, $lhs, $rhs", + vec.prefix#"."#name, simdop>; } -multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> { - defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>; - defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>; - defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>; - defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>; +multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop, bit commutable = false> { + let isCommutable = commutable in + defm "" : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + (outs), (ins), [], + "v128."#name#"\t$dst, $lhs, $rhs", "v128."#name, simdop>; + foreach vec = IntVecs in + def : Pat<(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)), + (!cast<NI>(NAME) $lhs, $rhs)>; } -multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name, - bits<32> simdop> { - defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), - [(set (vec_t V128:$dst), - (vec_t (node (vec_t V128:$vec))) - )], - vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; +multiclass SIMDUnary<Vec vec, SDNode node, string name, bits<32> simdop> { + defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), + [(set (vec.vt V128:$dst), + (vec.vt (node (vec.vt V128:$v))))], + vec.prefix#"."#name#"\t$dst, $v", + vec.prefix#"."#name, simdop>; } // Bitwise logic: v128.not -foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in -defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>; +defm NOT : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [], + "v128.not\t$dst, $v", "v128.not", 77>; +foreach vec = IntVecs in +def : Pat<(vnot (vec.vt V128:$v)), (NOT $v)>; // Bitwise logic: v128.and / v128.or / v128.xor -let isCommutable = 1 in { -defm AND : SIMDBitwise<and, "and", 78>; -defm OR : SIMDBitwise<or, "or", 80>; -defm XOR : SIMDBitwise<xor, "xor", 81>; -} // isCommutable = 1 +defm AND : SIMDBitwise<and, "and", 78, true>; +defm OR : SIMDBitwise<or, "or", 80, true>; +defm XOR : SIMDBitwise<xor, "xor", 81, true>; // Bitwise logic: v128.andnot def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>; // Bitwise select: v128.bitselect -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in - defm BITSELECT_#vec_t : - SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), - [(set (vec_t V128:$dst), - (vec_t (int_wasm_bitselect - (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) - )) - )], - "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; +defm BITSELECT : + SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [], + "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; + +foreach vec = AllVecs in +def : Pat<(vec.vt (int_wasm_bitselect + (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))), + (BITSELECT $v1, $v2, $c)>; // Bitselect is equivalent to (c & v1) | (~c & v2) -foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in - def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)), - (and (vnot V128:$c), (vec_t V128:$v2)))), - (!cast<Instruction>("BITSELECT_"#vec_t) - V128:$v1, V128:$v2, V128:$c)>; +foreach vec = IntVecs in +def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)), + (and (vnot V128:$c), (vec.vt V128:$v2)))), + (BITSELECT $v1, $v2, $c)>; + +// Also implement vselect in terms of bitselect +foreach vec = AllVecs in +def : Pat<(vec.vt (vselect + (vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))), + (BITSELECT $v1, $v2, $c)>; + +// MVP select on v128 values +defm SELECT_V128 : + I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [], + "v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>; + +foreach vec = AllVecs in { +def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)), + (SELECT_V128 $lhs, $rhs, $cond)>; + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select + (i32 (setne I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)), + (SELECT_V128 $lhs, $rhs, $cond)>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select + (i32 (seteq I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)), + (SELECT_V128 $rhs, $lhs, $cond)>; +} // foreach vec + +// Sign select +multiclass SIMDSignSelect<Vec vec, bits<32> simdop> { + defm SIGNSELECT_#vec : + SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), + [(set (vec.vt V128:$dst), + (vec.vt (int_wasm_signselect + (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))))], + vec.prefix#".signselect\t$dst, $v1, $v2, $c", + vec.prefix#".signselect", simdop>; +} + +defm : SIMDSignSelect<I8x16, 125>; +defm : SIMDSignSelect<I16x8, 126>; +defm : SIMDSignSelect<I32x4, 127>; +defm : SIMDSignSelect<I64x2, 148>; //===----------------------------------------------------------------------===// // Integer unary arithmetic //===----------------------------------------------------------------------===// multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> { - defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>; - defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; - defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; - defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; + defm "" : SIMDUnary<I8x16, node, name, baseInst>; + defm "" : SIMDUnary<I16x8, node, name, !add(baseInst, 32)>; + defm "" : SIMDUnary<I32x4, node, name, !add(baseInst, 64)>; + defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>; } -multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name, - bits<32> simdop> { - defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), - [(set I32:$dst, (i32 (op (vec_t V128:$vec))))], - vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; +multiclass SIMDReduceVec<Vec vec, SDNode op, string name, bits<32> simdop> { + defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), + [(set I32:$dst, (i32 (op (vec.vt V128:$vec))))], + vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, + simdop>; } multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> { - defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>; - defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>; - defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>; - defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>; + defm "" : SIMDReduceVec<I8x16, op, name, baseInst>; + defm "" : SIMDReduceVec<I16x8, op, name, !add(baseInst, 32)>; + defm "" : SIMDReduceVec<I32x4, op, name, !add(baseInst, 64)>; + defm "" : SIMDReduceVec<I64x2, op, name, !add(baseInst, 96)>; } // Integer vector negation -def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; +def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>; // Integer absolute value: abs defm ABS : SIMDUnaryInt<abs, "abs", 96>; @@ -614,64 +847,56 @@ defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>; // All lanes true: all_true defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>; +// Population count: popcnt +defm POPCNT : SIMDUnary<I8x16, int_wasm_popcnt, "popcnt", 124>; + // Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 // can be folded out foreach reduction = [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in -foreach ty = [v16i8, v8i16, v4i32, v2i64] in { -def : Pat<(i32 (and - (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), - (i32 1) - )), - (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; -def : Pat<(i32 (setne - (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), - (i32 0) - )), - (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; -def : Pat<(i32 (seteq - (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), - (i32 1) - )), - (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; -} - -multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> { - defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), - [(set I32:$dst, - (i32 (int_wasm_bitmask (vec_t V128:$vec))) - )], - vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>; -} - -defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>; -defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>; -defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>; +foreach vec = IntVecs in { +defvar intrinsic = !cast<Intrinsic>(reduction[0]); +defvar inst = !cast<NI>(reduction[1]#"_"#vec); +def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; +def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>; +def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; +} + +multiclass SIMDBitmask<Vec vec, bits<32> simdop> { + defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), + [(set I32:$dst, + (i32 (int_wasm_bitmask (vec.vt V128:$vec))))], + vec.prefix#".bitmask\t$dst, $vec", vec.prefix#".bitmask", + simdop>; +} + +defm BITMASK : SIMDBitmask<I8x16, 100>; +defm BITMASK : SIMDBitmask<I16x8, 132>; +defm BITMASK : SIMDBitmask<I32x4, 164>; +defm BITMASK : SIMDBitmask<I64x2, 196>; //===----------------------------------------------------------------------===// // Bit shifts //===----------------------------------------------------------------------===// -multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name, - bits<32> simdop> { - defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), - (outs), (ins), - [(set (vec_t V128:$dst), (node V128:$vec, I32:$x))], - vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>; +multiclass SIMDShift<Vec vec, SDNode node, string name, bits<32> simdop> { + defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins), + [(set (vec.vt V128:$dst), (node V128:$vec, I32:$x))], + vec.prefix#"."#name#"\t$dst, $vec, $x", + vec.prefix#"."#name, simdop>; } multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { - defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>; - defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>; - defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>; - defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>; + defm "" : SIMDShift<I8x16, node, name, baseInst>; + defm "" : SIMDShift<I16x8, node, name, !add(baseInst, 32)>; + defm "" : SIMDShift<I32x4, node, name, !add(baseInst, 64)>; + defm "" : SIMDShift<I64x2, node, name, !add(baseInst, 96)>; } // WebAssembly SIMD shifts are nonstandard in that the shift amount is // an i32 rather than a vector, so they need custom nodes. -def wasm_shift_t : SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>] ->; +def wasm_shift_t : + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; @@ -688,24 +913,24 @@ defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>; //===----------------------------------------------------------------------===// multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> { - defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; - defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; - defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; + defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>; + defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>; + defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>; } multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> { - defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>; - defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; + defm "" : SIMDBinary<I8x16, node, name, baseInst>; + defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>; } multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDBinaryIntSmall<node, name, baseInst>; - defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; + defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>; } multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; - defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; + defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>; } // Integer addition: add / add_saturate_s / add_saturate_u @@ -736,38 +961,74 @@ defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>; // Integer unsigned rounding average: avgr_u let isCommutable = 1 in { -defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>; -defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>; +defm AVGR_U : SIMDBinary<I8x16, int_wasm_avgr_unsigned, "avgr_u", 123>; +defm AVGR_U : SIMDBinary<I16x8, int_wasm_avgr_unsigned, "avgr_u", 155>; } -def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), - (add node:$lhs, node:$rhs), +def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), (add $lhs, $rhs), "return N->getFlags().hasNoUnsignedWrap();">; -foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in +foreach vec = [I8x16, I16x8] in { +defvar inst = !cast<NI>("AVGR_U_"#vec); def : Pat<(wasm_shr_u (add_nuw - (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)), - (nodes[1] (i32 1)) - ), - (i32 1) - ), - (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>; + (add_nuw (vec.vt V128:$lhs), (vec.vt V128:$rhs)), + (vec.splat (i32 1))), + (i32 1)), + (inst $lhs, $rhs)>; +} // Widening dot product: i32x4.dot_i16x8_s let isCommutable = 1 in defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", - 180>; + 186>; + +// Extending multiplication: extmul_{low,high}_P, extmul_high +multiclass SIMDExtBinary<Vec vec, SDNode node, string name, bits<32> simdop> { + defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + (outs), (ins), + [(set (vec.vt V128:$dst), (node + (vec.split.vt V128:$lhs),(vec.split.vt V128:$rhs)))], + vec.prefix#"."#name#"\t$dst, $lhs, $rhs", + vec.prefix#"."#name, simdop>; +} + +defm EXTMUL_LOW_S : + SIMDExtBinary<I16x8, int_wasm_extmul_low_signed, "extmul_low_i8x16_s", 154>; +defm EXTMUL_HIGH_S : + SIMDExtBinary<I16x8, int_wasm_extmul_high_signed, "extmul_high_i8x16_s", 157>; +defm EXTMUL_LOW_U : + SIMDExtBinary<I16x8, int_wasm_extmul_low_unsigned, "extmul_low_i8x16_u", 158>; +defm EXTMUL_HIGH_U : + SIMDExtBinary<I16x8, int_wasm_extmul_high_unsigned, "extmul_high_i8x16_u", 159>; + +defm EXTMUL_LOW_S : + SIMDExtBinary<I32x4, int_wasm_extmul_low_signed, "extmul_low_i16x8_s", 187>; +defm EXTMUL_HIGH_S : + SIMDExtBinary<I32x4, int_wasm_extmul_high_signed, "extmul_high_i16x8_s", 189>; +defm EXTMUL_LOW_U : + SIMDExtBinary<I32x4, int_wasm_extmul_low_unsigned, "extmul_low_i16x8_u", 190>; +defm EXTMUL_HIGH_U : + SIMDExtBinary<I32x4, int_wasm_extmul_high_unsigned, "extmul_high_i16x8_u", 191>; + +defm EXTMUL_LOW_S : + SIMDExtBinary<I64x2, int_wasm_extmul_low_signed, "extmul_low_i32x4_s", 210>; +defm EXTMUL_HIGH_S : + SIMDExtBinary<I64x2, int_wasm_extmul_high_signed, "extmul_high_i32x4_s", 211>; +defm EXTMUL_LOW_U : + SIMDExtBinary<I64x2, int_wasm_extmul_low_unsigned, "extmul_low_i32x4_u", 214>; +defm EXTMUL_HIGH_U : + SIMDExtBinary<I64x2, int_wasm_extmul_high_unsigned, "extmul_high_i32x4_u", 215>; //===----------------------------------------------------------------------===// // Floating-point unary arithmetic //===----------------------------------------------------------------------===// multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { - defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>; - defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; + defm "" : SIMDUnary<F32x4, node, name, baseInst>; + defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>; } // Absolute value: abs @@ -780,22 +1041,22 @@ defm NEG : SIMDUnaryFP<fneg, "neg", 225>; defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>; // Rounding: ceil, floor, trunc, nearest -defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>; -defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>; -defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>; -defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>; -defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>; -defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>; -defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>; -defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>; +defm CEIL : SIMDUnary<F32x4, int_wasm_ceil, "ceil", 216>; +defm FLOOR : SIMDUnary<F32x4, int_wasm_floor, "floor", 217>; +defm TRUNC: SIMDUnary<F32x4, int_wasm_trunc, "trunc", 218>; +defm NEAREST: SIMDUnary<F32x4, int_wasm_nearest, "nearest", 219>; +defm CEIL : SIMDUnary<F64x2, int_wasm_ceil, "ceil", 220>; +defm FLOOR : SIMDUnary<F64x2, int_wasm_floor, "floor", 221>; +defm TRUNC: SIMDUnary<F64x2, int_wasm_trunc, "trunc", 222>; +defm NEAREST: SIMDUnary<F64x2, int_wasm_nearest, "nearest", 223>; //===----------------------------------------------------------------------===// // Floating-point binary arithmetic //===----------------------------------------------------------------------===// multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> { - defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>; - defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; + defm "" : SIMDBinary<F32x4, node, name, baseInst>; + defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>; } // Addition: add @@ -828,63 +1089,151 @@ defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>; // Conversions //===----------------------------------------------------------------------===// -multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op, - string name, bits<32> simdop> { - defm op#_#vec_t#_#arg_t : +multiclass SIMDConvert<Vec vec, Vec arg, SDNode op, string name, + bits<32> simdop> { + defm op#_#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), - [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))], - name#"\t$dst, $vec", name, simdop>; + [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], + vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>; } // Floating point to integer with saturation: trunc_sat -defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>; -defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>; +defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>; +defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>; // Integer to floating point: convert -defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>; -defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>; +defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>; +defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>; + +// Lower llvm.wasm.trunc.saturate.* to saturating instructions +def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), + (fp_to_sint_I32x4 $src)>; +def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), + (fp_to_uint_I32x4 $src)>; // Widening operations -multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg, - bits<32> baseInst> { - defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_signed, - vec#".widen_low_"#arg#"_s", baseInst>; - defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_signed, - vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>; - defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_unsigned, - vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>; - defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_unsigned, - vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>; +def widen_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def widen_low_s : SDNode<"WebAssemblyISD::WIDEN_LOW_S", widen_t>; +def widen_high_s : SDNode<"WebAssemblyISD::WIDEN_HIGH_S", widen_t>; +def widen_low_u : SDNode<"WebAssemblyISD::WIDEN_LOW_U", widen_t>; +def widen_high_u : SDNode<"WebAssemblyISD::WIDEN_HIGH_U", widen_t>; + +// TODO: refactor this to be uniform for i64x2 if the numbering is not changed. +multiclass SIMDWiden<Vec vec, bits<32> baseInst> { + defm "" : SIMDConvert<vec, vec.split, widen_low_s, + "widen_low_"#vec.split.prefix#"_s", baseInst>; + defm "" : SIMDConvert<vec, vec.split, widen_high_s, + "widen_high_"#vec.split.prefix#"_s", !add(baseInst, 1)>; + defm "" : SIMDConvert<vec, vec.split, widen_low_u, + "widen_low_"#vec.split.prefix#"_u", !add(baseInst, 2)>; + defm "" : SIMDConvert<vec, vec.split, widen_high_u, + "widen_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>; } -defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>; -defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>; +defm "" : SIMDWiden<I16x8, 135>; +defm "" : SIMDWiden<I32x4, 167>; + +defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_low_signed, + "widen_low_i32x4_s", 199>; +defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_high_signed, + "widen_high_i32x4_s", 200>; +defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_low_unsigned, + "widen_low_i32x4_u", 201>; +defm "" : SIMDConvert<I64x2, I32x4, int_wasm_widen_high_unsigned, + "widen_high_i32x4_u", 202>; // Narrowing operations -multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg, - bits<32> baseInst> { - defm NARROW_S_#vec_t : +multiclass SIMDNarrow<Vec vec, bits<32> baseInst> { + defvar name = vec.split.prefix#".narrow_"#vec.prefix; + defm NARROW_S_#vec.split : SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), - [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed - (arg_t V128:$low), (arg_t V128:$high))))], - vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s", - baseInst>; - defm NARROW_U_#vec_t : + [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_signed + (vec.vt V128:$low), (vec.vt V128:$high))))], + name#"_s\t$dst, $low, $high", name#"_s", baseInst>; + defm NARROW_U_#vec.split : SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), - [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned - (arg_t V128:$low), (arg_t V128:$high))))], - vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u", - !add(baseInst, 1)>; + [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_unsigned + (vec.vt V128:$low), (vec.vt V128:$high))))], + name#"_u\t$dst, $low, $high", name#"_u", !add(baseInst, 1)>; } -defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>; -defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>; +defm "" : SIMDNarrow<I16x8, 101>; +defm "" : SIMDNarrow<I32x4, 133>; + +// Use narrowing operations for truncating stores. Since the narrowing +// operations are saturating instead of truncating, we need to mask +// the stored values first. +// TODO: Use consts instead of splats +def store_v8i8_trunc_v8i16 : + OutPatFrag<(ops node:$val), + (EXTRACT_LANE_I64x2 + (NARROW_U_I8x16 + (AND (SPLAT_I32x4 (CONST_I32 0x00ff00ff)), node:$val), + $val), // Unused input + 0)>; + +def store_v4i16_trunc_v4i32 : + OutPatFrag<(ops node:$val), + (EXTRACT_LANE_I64x2 + (NARROW_U_I16x8 + (AND (SPLAT_I32x4 (CONST_I32 0x0000ffff)), node:$val), + $val), // Unused input + 0)>; + +// Store patterns adapted from WebAssemblyInstrMemory.td +multiclass NarrowingStorePatNoOffset<Vec vec, OutPatFrag out> { + defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits); + def : Pat<(node vec.vt:$val, I32:$addr), + (STORE_I64_A32 0, 0, $addr, (out $val))>, + Requires<[HasAddr32]>; + def : Pat<(node vec.vt:$val, I64:$addr), + (STORE_I64_A64 0, 0, $addr, (out $val))>, + Requires<[HasAddr64]>; +} -// Lower llvm.wasm.trunc.saturate.* to saturating instructions -def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), - (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; -def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), - (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; +defm : NarrowingStorePatNoOffset<I16x8, store_v8i8_trunc_v8i16>; +defm : NarrowingStorePatNoOffset<I32x4, store_v4i16_trunc_v4i32>; + +multiclass NarrowingStorePatImmOff<Vec vec, PatFrag operand, OutPatFrag out> { + defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits); + def : Pat<(node vec.vt:$val, (operand I32:$addr, imm:$off)), + (STORE_I64_A32 0, imm:$off, $addr, (out $val))>, + Requires<[HasAddr32]>; + def : Pat<(node vec.vt:$val, (operand I64:$addr, imm:$off)), + (STORE_I64_A64 0, imm:$off, $addr, (out $val))>, + Requires<[HasAddr64]>; +} + +defm : NarrowingStorePatImmOff<I16x8, regPlusImm, store_v8i8_trunc_v8i16>; +defm : NarrowingStorePatImmOff<I32x4, regPlusImm, store_v4i16_trunc_v4i32>; +defm : NarrowingStorePatImmOff<I16x8, or_is_add, store_v8i8_trunc_v8i16>; +defm : NarrowingStorePatImmOff<I32x4, or_is_add, store_v4i16_trunc_v4i32>; + +multiclass NarrowingStorePatOffsetOnly<Vec vec, OutPatFrag out> { + defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits); + def : Pat<(node vec.vt:$val, imm:$off), + (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (out $val))>, + Requires<[HasAddr32]>; + def : Pat<(node vec.vt:$val, imm:$off), + (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (out $val))>, + Requires<[HasAddr64]>; +} + +defm : NarrowingStorePatOffsetOnly<I16x8, store_v8i8_trunc_v8i16>; +defm : NarrowingStorePatOffsetOnly<I32x4, store_v4i16_trunc_v4i32>; + +multiclass NarrowingStorePatGlobalAddrOffOnly<Vec vec, OutPatFrag out> { + defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits); + def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64_A32 0, tglobaladdr:$off, (CONST_I32 0), (out $val))>, + Requires<[IsNotPIC, HasAddr32]>; + def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64_A64 0, tglobaladdr:$off, (CONST_I64 0), (out $val))>, + Requires<[IsNotPIC, HasAddr64]>; +} + +defm : NarrowingStorePatGlobalAddrOffOnly<I16x8, store_v8i8_trunc_v8i16>; +defm : NarrowingStorePatGlobalAddrOffOnly<I32x4, store_v4i16_trunc_v4i32>; // Bitcasts are nops // Matching bitcast t1 to t1 causes strange errors, so avoid repeating types @@ -897,24 +1246,96 @@ foreach t2 = !foldl( ) in def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; +// Extended pairwise addition +defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed, + "extadd_pairwise_i8x16_s", 0xc2>; +defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned, + "extadd_pairwise_i8x16_u", 0xc3>; +defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed, + "extadd_pairwise_i16x8_s", 0xa5>; +defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned, + "extadd_pairwise_i16x8_u", 0xa6>; + + +// Prototype f64x2 conversions +defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_signed, + "convert_low_i32x4_s", 0x53>; +defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_unsigned, + "convert_low_i32x4_u", 0x54>; +defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_saturate_zero_signed, + "trunc_sat_zero_f64x2_s", 0x55>; +defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_saturate_zero_unsigned, + "trunc_sat_zero_f64x2_u", 0x56>; +defm "" : SIMDConvert<F32x4, F64x2, int_wasm_demote_zero, + "demote_zero_f64x2", 0x57>; +defm "" : SIMDConvert<F64x2, F32x4, int_wasm_promote_low, + "promote_low_f32x4", 0x69>; + //===----------------------------------------------------------------------===// // Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) //===----------------------------------------------------------------------===// -multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> { - defm QFMA_#vec_t : +multiclass SIMDQFM<Vec vec, bits<32> simdopA, bits<32> simdopS> { + defm QFMA_#vec : SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), - [(set (vec_t V128:$dst), - (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], - vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; - defm QFMS_#vec_t : + [(set (vec.vt V128:$dst), (int_wasm_qfma + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".qfma\t$dst, $a, $b, $c", vec.prefix#".qfma", simdopA>; + defm QFMS_#vec : SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), - [(set (vec_t V128:$dst), - (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], - vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; + [(set (vec.vt V128:$dst), (int_wasm_qfms + (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], + vec.prefix#".qfms\t$dst, $a, $b, $c", vec.prefix#".qfms", simdopS>; } -defm "" : SIMDQFM<v4f32, "f32x4", 252>; -defm "" : SIMDQFM<v2f64, "f64x2", 254>; +defm "" : SIMDQFM<F32x4, 180, 212>; +defm "" : SIMDQFM<F64x2, 254, 255>; + +//===----------------------------------------------------------------------===// +// Saturating Rounding Q-Format Multiplication +//===----------------------------------------------------------------------===// + +defm Q15MULR_SAT_S : + SIMDBinary<I16x8, int_wasm_q15mulr_saturate_signed, "q15mulr_sat_s", 156>; + +//===----------------------------------------------------------------------===// +// Experimental prefetch instructions: prefetch.t, prefetch.nt +//===----------------------------------------------------------------------===// + +let mayLoad = true, UseNamedOperandTable = true in { +defm PREFETCH_T_A32 : + SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "prefetch.t\t${off}(${addr})$p2align", + "prefetch.t\t$off$p2align", 0xc5>; +defm PREFETCH_T_A64 : + SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + "prefetch.t\t${off}(${addr})$p2align", + "prefetch.t\t$off$p2align", 0xc5>; +defm PREFETCH_NT_A32 : + SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "prefetch.nt\t${off}(${addr})$p2align", + "prefetch.nt\t$off$p2align", 0xc6>; +defm PREFETCH_NT_A64 : + SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + "prefetch.nt\t${off}(${addr})$p2align", + "prefetch.nt\t$off$p2align", 0xc6>; +} // mayLoad, UseNamedOperandTable + +multiclass PrefetchPatNoOffset<PatFrag kind, string inst> { + def : Pat<(kind I32:$addr), (!cast<NI>(inst # "_A32") 0, 0, $addr)>, + Requires<[HasAddr32]>; + def : Pat<(kind I64:$addr), (!cast<NI>(inst # "_A64") 0, 0, $addr)>, + Requires<[HasAddr64]>; +} + +foreach inst = [["PREFETCH_T", "int_wasm_prefetch_t"], + ["PREFETCH_NT", "int_wasm_prefetch_nt"]] in { +defvar node = !cast<Intrinsic>(inst[1]); +defm : PrefetchPatNoOffset<node, inst[0]>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td new file mode 100644 index 000000000000..97638c3494ae --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td @@ -0,0 +1,64 @@ +// WebAssemblyInstrTable.td - WebAssembly Table codegen support -*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly Table operand code-gen constructs. +/// Instructions that handle tables +//===----------------------------------------------------------------------===// + + +multiclass TABLE<WebAssemblyRegClass rt> { + defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table), + (outs), (ins table32_op:$table), + [], + "table.get\t$res, $table", + "table.get\t$table", + 0x25>; + + defm TABLE_SET_#rt : I<(outs), (ins table32_op:$table, rt:$val, I32:$i), + (outs), (ins table32_op:$table), + [], + "table.set\t$table, $val, $i", + "table.set\t$table", + 0x26>; + + defm TABLE_GROW_#rt : I<(outs I32:$sz), (ins table32_op:$table, I32:$n, rt:$val), + (outs), (ins table32_op:$table), + [], + "table.grow\t$sz, $table, $n, $val", + "table.grow\t$table", + 0xfc0f>; + + defm TABLE_FILL_#rt : I<(outs), (ins table32_op:$table, I32:$n, rt:$val, I32:$i), + (outs), (ins table32_op:$table), + [], + "table.fill\t$table, $n, $val, $i", + "table.fill\t$table", + 0xfc11>; + +} + +defm "" : TABLE<FUNCREF>, Requires<[HasReferenceTypes]>; +defm "" : TABLE<EXTERNREF>, Requires<[HasReferenceTypes]>; + +defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table), + (outs), (ins table32_op:$table), + [], + "table.size\t$sz, $table", + "table.size\t$table", + 0xfc10>, + Requires<[HasReferenceTypes]>; + + +defm TABLE_COPY : I<(outs), (ins table32_op:$table1, table32_op:$table2, I32:$n, I32:$s, I32:$d), + (outs), (ins table32_op:$table1, table32_op:$table2), + [], + "table.copy\t$table1, $table2, $n, $s, $d", + "table.copy\t$table1, $table2", + 0xfc0e>, + Requires<[HasReferenceTypes]>; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index 346938daf1aa..e07dae65fc4a 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -15,7 +15,7 @@ #include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -32,15 +32,17 @@ class WebAssemblyLateEHPrepare final : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override; + bool removeUnreachableEHPads(MachineFunction &MF); void recordCatchRetBBs(MachineFunction &MF); - bool addCatches(MachineFunction &MF); + bool hoistCatches(MachineFunction &MF); + bool addCatchAlls(MachineFunction &MF); bool replaceFuncletReturns(MachineFunction &MF); bool removeUnnecessaryUnreachables(MachineFunction &MF); - bool addExceptionExtraction(MachineFunction &MF); + bool ensureSingleBBTermPads(MachineFunction &MF); bool restoreStackPointer(MachineFunction &MF); MachineBasicBlock *getMatchingEHPad(MachineInstr *MI); - SmallSet<MachineBasicBlock *, 8> CatchRetBBs; + SmallPtrSet<MachineBasicBlock *, 8> CatchRetBBs; public: static char ID; // Pass identification, replacement for typeid @@ -94,15 +96,18 @@ WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) { template <typename Container> static void eraseDeadBBsAndChildren(const Container &MBBs) { SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end()); + SmallPtrSet<MachineBasicBlock *, 8> Deleted; while (!WL.empty()) { MachineBasicBlock *MBB = WL.pop_back_val(); - if (!MBB->pred_empty()) + if (Deleted.count(MBB) || !MBB->pred_empty()) continue; - SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(), - MBB->succ_end()); + SmallVector<MachineBasicBlock *, 4> Succs(MBB->successors()); WL.append(MBB->succ_begin(), MBB->succ_end()); for (auto *Succ : Succs) MBB->removeSuccessor(Succ); + // To prevent deleting the same BB multiple times, which can happen when + // 'MBBs' contain both a parent and a child + Deleted.insert(MBB); MBB->eraseFromParent(); } } @@ -118,21 +123,33 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; if (MF.getFunction().hasPersonalityFn()) { + Changed |= removeUnreachableEHPads(MF); recordCatchRetBBs(MF); - Changed |= addCatches(MF); + Changed |= hoistCatches(MF); + Changed |= addCatchAlls(MF); Changed |= replaceFuncletReturns(MF); + Changed |= ensureSingleBBTermPads(MF); } Changed |= removeUnnecessaryUnreachables(MF); - if (MF.getFunction().hasPersonalityFn()) { - Changed |= addExceptionExtraction(MF); + if (MF.getFunction().hasPersonalityFn()) Changed |= restoreStackPointer(MF); - } return Changed; } -// Record which BB ends with 'CATCHRET' instruction, because this will be -// replaced with BRs later. This set of 'CATCHRET' BBs is necessary in -// 'getMatchingEHPad' function. +// Remove unreachable EH pads and its children. If they remain, CFG +// stackification can be tricky. +bool WebAssemblyLateEHPrepare::removeUnreachableEHPads(MachineFunction &MF) { + SmallVector<MachineBasicBlock *, 4> ToDelete; + for (auto &MBB : MF) + if (MBB.isEHPad() && MBB.pred_empty()) + ToDelete.push_back(&MBB); + eraseDeadBBsAndChildren(ToDelete); + return !ToDelete.empty(); +} + +// Record which BB ends with catchret instruction, because this will be replaced +// with 'br's later. This set of catchret BBs is necessary in 'getMatchingEHPad' +// function. void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) { CatchRetBBs.clear(); for (auto &MBB : MF) { @@ -145,25 +162,69 @@ void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) { } } -// Add catch instruction to beginning of catchpads and cleanuppads. -bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) { +// Hoist catch instructions to the beginning of their matching EH pad BBs in +// case, +// (1) catch instruction is not the first instruction in EH pad. +// ehpad: +// some_other_instruction +// ... +// %exn = catch 0 +// (2) catch instruction is in a non-EH pad BB. For example, +// ehpad: +// br bb0 +// bb0: +// %exn = catch 0 +bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) { + bool Changed = false; + SmallVector<MachineInstr *, 16> Catches; + for (auto &MBB : MF) + for (auto &MI : MBB) + if (WebAssembly::isCatch(MI.getOpcode())) + Catches.push_back(&MI); + + for (auto *Catch : Catches) { + MachineBasicBlock *EHPad = getMatchingEHPad(Catch); + assert(EHPad && "No matching EH pad for catch"); + auto InsertPos = EHPad->begin(); + // Skip EH_LABELs in the beginning of an EH pad if present. We don't use + // these labels at the moment, but other targets also seem to have an + // EH_LABEL instruction in the beginning of an EH pad. + while (InsertPos != EHPad->end() && InsertPos->isEHLabel()) + InsertPos++; + if (InsertPos == Catch) + continue; + Changed = true; + EHPad->insert(InsertPos, Catch->removeFromParent()); + } + return Changed; +} + +// Add catch_all to beginning of cleanup pads. +bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) { bool Changed = false; const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); + for (auto &MBB : MF) { - if (MBB.isEHPad()) { + if (!MBB.isEHPad()) + continue; + auto InsertPos = MBB.begin(); + // Skip EH_LABELs in the beginning of an EH pad if present. + while (InsertPos != MBB.end() && InsertPos->isEHLabel()) + InsertPos++; + // This runs after hoistCatches(), so we assume that if there is a catch, + // that should be the non-EH label first instruction in an EH pad. + if (InsertPos == MBB.end() || + !WebAssembly::isCatch(InsertPos->getOpcode())) { Changed = true; - auto InsertPos = MBB.begin(); - if (InsertPos->isEHLabel()) // EH pad starts with an EH label - ++InsertPos; - Register DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); - BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(), - TII.get(WebAssembly::CATCH), DstReg); + BuildMI(MBB, InsertPos, InsertPos->getDebugLoc(), + TII.get(WebAssembly::CATCH_ALL)); } } return Changed; } +// Replace pseudo-instructions catchret and cleanupret with br and rethrow +// respectively. bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { bool Changed = false; const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); @@ -185,17 +246,11 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { Changed = true; break; } - case WebAssembly::CLEANUPRET: - case WebAssembly::RETHROW_IN_CATCH: { - // Replace a cleanupret/rethrow_in_catch with a rethrow - auto *EHPad = getMatchingEHPad(TI); - auto CatchPos = EHPad->begin(); - if (CatchPos->isEHLabel()) // EH pad starts with an EH label - ++CatchPos; - MachineInstr *Catch = &*CatchPos; - Register ExnReg = Catch->getOperand(0).getReg(); + case WebAssembly::CLEANUPRET: { + // Replace a cleanupret with a rethrow. For C++ support, currently + // rethrow's immediate argument is always 0 (= the latest exception). BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) - .addReg(ExnReg); + .addImm(0); TI->eraseFromParent(); Changed = true; break; @@ -205,6 +260,7 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { return Changed; } +// Remove unnecessary unreachables after a throw or rethrow. bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( MachineFunction &MF) { bool Changed = false; @@ -220,8 +276,7 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( // because throw itself is a terminator, and also delete successors if // any. MBB.erase(std::next(MI.getIterator()), MBB.end()); - SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(), - MBB.succ_end()); + SmallVector<MachineBasicBlock *, 8> Succs(MBB.successors()); for (auto *Succ : Succs) if (!Succ->isEHPad()) MBB.removeSuccessor(Succ); @@ -232,154 +287,78 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( return Changed; } -// Wasm uses 'br_on_exn' instruction to check the tag of an exception. It takes -// exnref type object returned by 'catch', and branches to the destination if it -// matches a given tag. We currently use __cpp_exception symbol to represent the -// tag for all C++ exceptions. +// Clang-generated terminate pads are an single-BB EH pad in the form of +// termpad: +// %exn = catch $__cpp_exception +// call @__clang_call_terminate(%exn) +// unreachable +// (There can be local.set and local.gets before the call if we didn't run +// RegStackify) +// But code transformations can change or add more control flow, so the call to +// __clang_call_terminate() function may not be in the original EH pad anymore. +// This ensures every terminate pad is a single BB in the form illustrated +// above. // -// block $l (result i32) -// ... -// ;; exnref $e is on the stack at this point -// br_on_exn $l $e ;; branch to $l with $e's arguments -// ... -// end -// ;; Here we expect the extracted values are on top of the wasm value stack -// ... Handle exception using values ... -// -// br_on_exn takes an exnref object and branches if it matches the given tag. -// There can be multiple br_on_exn instructions if we want to match for another -// tag, but for now we only test for __cpp_exception tag, and if it does not -// match, i.e., it is a foreign exception, we rethrow it. -// -// In the destination BB that's the target of br_on_exn, extracted exception -// values (in C++'s case a single i32, which represents an exception pointer) -// are placed on top of the wasm stack. Because we can't model wasm stack in -// LLVM instruction, we use 'extract_exception' pseudo instruction to retrieve -// it. The pseudo instruction will be deleted later. -bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { +// This is preparation work for the HandleEHTerminatePads pass later, which +// duplicates terminate pads both for 'catch' and 'catch_all'. Refer to +// WebAssemblyHandleEHTerminatePads.cpp for details. +bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) { const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - auto *EHInfo = MF.getWasmEHFuncInfo(); - SmallVector<MachineInstr *, 16> ExtractInstrs; - SmallVector<MachineInstr *, 8> ToDelete; - for (auto &MBB : MF) { - for (auto &MI : MBB) { - if (MI.getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) { - if (MI.getOperand(0).isDead()) - ToDelete.push_back(&MI); - else - ExtractInstrs.push_back(&MI); - } - } - } - bool Changed = !ToDelete.empty() || !ExtractInstrs.empty(); - for (auto *MI : ToDelete) - MI->eraseFromParent(); - if (ExtractInstrs.empty()) - return Changed; - - // Find terminate pads. - SmallSet<MachineBasicBlock *, 8> TerminatePads; + + // Find calls to __clang_call_terminate() + SmallVector<MachineInstr *, 8> ClangCallTerminateCalls; + SmallPtrSet<MachineBasicBlock *, 8> TermPads; for (auto &MBB : MF) { for (auto &MI : MBB) { if (MI.isCall()) { const MachineOperand &CalleeOp = MI.getOperand(0); if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() == - WebAssembly::ClangCallTerminateFn) - TerminatePads.insert(getMatchingEHPad(&MI)); + WebAssembly::ClangCallTerminateFn) { + MachineBasicBlock *EHPad = getMatchingEHPad(&MI); + assert(EHPad && "No matching EH pad for __clang_call_terminate"); + // In case a __clang_call_terminate call is duplicated during code + // transformation so one terminate pad contains multiple + // __clang_call_terminate calls, we only count one of them + if (TermPads.insert(EHPad).second) + ClangCallTerminateCalls.push_back(&MI); + } } } } - for (auto *Extract : ExtractInstrs) { - MachineBasicBlock *EHPad = getMatchingEHPad(Extract); - assert(EHPad && "No matching EH pad for extract_exception"); - auto CatchPos = EHPad->begin(); - if (CatchPos->isEHLabel()) // EH pad starts with an EH label - ++CatchPos; - MachineInstr *Catch = &*CatchPos; - - if (Catch->getNextNode() != Extract) - EHPad->insert(Catch->getNextNode(), Extract->removeFromParent()); - - // - Before: - // ehpad: - // %exnref:exnref = catch - // %exn:i32 = extract_exception - // ... use exn ... - // - // - After: - // ehpad: - // %exnref:exnref = catch - // br_on_exn %thenbb, $__cpp_exception, %exnref - // br %elsebb - // elsebb: - // rethrow - // thenbb: - // %exn:i32 = extract_exception - // ... use exn ... - Register ExnReg = Catch->getOperand(0).getReg(); - auto *ThenMBB = MF.CreateMachineBasicBlock(); - auto *ElseMBB = MF.CreateMachineBasicBlock(); - MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB); - MF.insert(std::next(MachineFunction::iterator(ElseMBB)), ThenMBB); - ThenMBB->splice(ThenMBB->end(), EHPad, Extract, EHPad->end()); - ThenMBB->transferSuccessors(EHPad); - EHPad->addSuccessor(ThenMBB); - EHPad->addSuccessor(ElseMBB); - - DebugLoc DL = Extract->getDebugLoc(); - const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception"); - BuildMI(EHPad, DL, TII.get(WebAssembly::BR_ON_EXN)) - .addMBB(ThenMBB) - .addExternalSymbol(CPPExnSymbol) - .addReg(ExnReg); - BuildMI(EHPad, DL, TII.get(WebAssembly::BR)).addMBB(ElseMBB); - - // When this is a terminate pad with __clang_call_terminate() call, we don't - // rethrow it anymore and call __clang_call_terminate() with a nullptr - // argument, which will call std::terminate(). - // - // - Before: - // ehpad: - // %exnref:exnref = catch - // %exn:i32 = extract_exception - // call @__clang_call_terminate(%exn) - // unreachable - // - // - After: - // ehpad: - // %exnref:exnref = catch - // br_on_exn %thenbb, $__cpp_exception, %exnref - // br %elsebb - // elsebb: - // call @__clang_call_terminate(0) - // unreachable - // thenbb: - // %exn:i32 = extract_exception - // call @__clang_call_terminate(%exn) - // unreachable - if (TerminatePads.count(EHPad)) { - Function *ClangCallTerminateFn = - MF.getFunction().getParent()->getFunction( - WebAssembly::ClangCallTerminateFn); - assert(ClangCallTerminateFn && - "There is no __clang_call_terminate() function"); - Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0); - BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL)) - .addGlobalAddress(ClangCallTerminateFn) - .addReg(Reg); - BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE)); - - } else { - BuildMI(ElseMBB, DL, TII.get(WebAssembly::RETHROW)).addReg(ExnReg); - if (EHInfo->hasEHPadUnwindDest(EHPad)) - ElseMBB->addSuccessor(EHInfo->getEHPadUnwindDest(EHPad)); - } - } + bool Changed = false; + for (auto *Call : ClangCallTerminateCalls) { + MachineBasicBlock *EHPad = getMatchingEHPad(Call); + assert(EHPad && "No matching EH pad for __clang_call_terminate"); + + // If it is already the form we want, skip it + if (Call->getParent() == EHPad && + Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE) + continue; - return true; + // In case the __clang_call_terminate() call is not in its matching EH pad, + // move the call to the end of EH pad and add an unreachable instruction + // after that. Delete all successors and their children if any, because here + // the program terminates. + Changed = true; + // This runs after hoistCatches(), so catch instruction should be at the top + MachineInstr *Catch = WebAssembly::findCatch(EHPad); + assert(Catch && "EH pad does not have a catch instruction"); + // Takes the result register of the catch instruction as argument. There may + // have been some other local.set/local.gets in between, but at this point + // we don't care. + Call->getOperand(1).setReg(Catch->getOperand(0).getReg()); + auto InsertPos = std::next(MachineBasicBlock::iterator(Catch)); + EHPad->insert(InsertPos, Call->removeFromParent()); + BuildMI(*EHPad, InsertPos, Call->getDebugLoc(), + TII.get(WebAssembly::UNREACHABLE)); + EHPad->erase(InsertPos, EHPad->end()); + SmallVector<MachineBasicBlock *, 8> Succs(EHPad->successors()); + for (auto *Succ : Succs) + EHPad->removeSuccessor(Succ); + eraseDeadBBsAndChildren(Succs); + } + return Changed; } // After the stack is unwound due to a thrown exception, the __stack_pointer @@ -406,7 +385,7 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { auto InsertPos = MBB.begin(); if (InsertPos->isEHLabel()) // EH pad starts with an EH label ++InsertPos; - if (InsertPos->getOpcode() == WebAssembly::CATCH) + if (WebAssembly::isCatch(InsertPos->getOpcode())) ++InsertPos; FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB, InsertPos, MBB.begin()->getDebugLoc()); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 5fce4a600510..d3bbadf27478 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -140,8 +140,7 @@ /// 1) Lower /// longjmp(buf, value) /// into -/// emscripten_longjmp_jmpbuf(buf, value) -/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later. +/// emscripten_longjmp(buf, value) /// /// In case calls to setjmp() exists /// @@ -196,19 +195,16 @@ /// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to /// each setjmp callsite. Label 0 means this longjmp buffer does not /// correspond to one of the setjmp callsites in this function, so in this -/// case we just chain the longjmp to the caller. (Here we call -/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf. -/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while -/// emscripten_longjmp takes an int. Both of them will eventually be lowered -/// to emscripten_longjmp in s2wasm, but here we need two signatures - we -/// can't translate an int value to a jmp_buf.) -/// Label -1 means no longjmp occurred. Otherwise we jump to the right -/// post-setjmp BB based on the label. +/// case we just chain the longjmp to the caller. Label -1 means no longjmp +/// occurred. Otherwise we jump to the right post-setjmp BB based on the +/// label. /// ///===----------------------------------------------------------------------===// #include "WebAssembly.h" +#include "WebAssemblyTargetMachine.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -239,7 +235,6 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { Function *ResumeF = nullptr; Function *EHTypeIDF = nullptr; Function *EmLongjmpF = nullptr; - Function *EmLongjmpJmpbufF = nullptr; Function *SaveSetjmpF = nullptr; Function *TestSetjmpF = nullptr; @@ -314,13 +309,23 @@ static bool canThrow(const Value *V) { // Get a global variable with the given name. If it doesn't exist declare it, // which will generate an import and asssumes that it will exist at link time. static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB, + WebAssemblyTargetMachine &TM, const char *Name) { - - auto *GV = - dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, IRB.getInt32Ty())); + auto Int32Ty = IRB.getInt32Ty(); + auto *GV = dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, Int32Ty)); if (!GV) report_fatal_error(Twine("unable to create global: ") + Name); + // If the target supports TLS, make this variable thread-local. We can't just + // unconditionally make it thread-local and depend on + // CoalesceFeaturesAndStripAtomics to downgrade it, because stripping TLS has + // the side effect of disallowing the object from being linked into a + // shared-memory module, which we don't want to be responsible for. + auto *Subtarget = TM.getSubtargetImpl(); + auto TLS = Subtarget->hasAtomics() && Subtarget->hasBulkMemory() + ? GlobalValue::LocalExecTLSModel + : GlobalValue::NotThreadLocal; + GV->setThreadLocalMode(TLS); return GV; } @@ -338,7 +343,7 @@ static std::string getSignature(FunctionType *FTy) { if (FTy->isVarArg()) OS << "_..."; Sig = OS.str(); - Sig.erase(remove_if(Sig, isSpace), Sig.end()); + erase_if(Sig, isSpace); // When s2wasm parses .s file, a comma means the end of an argument. So a // mangled function name can contain any character but a comma. std::replace(Sig.begin(), Sig.end(), ',', '.'); @@ -630,6 +635,40 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { } } +// Replace uses of longjmp with emscripten_longjmp. emscripten_longjmp takes +// arguments of type {i32, i32} and longjmp takes {jmp_buf*, i32}, so we need a +// ptrtoint instruction here to make the type match. jmp_buf* will eventually be +// lowered to i32 in the wasm backend. +static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF, + Function *EmLongjmpF) { + SmallVector<CallInst *, 8> ToErase; + LLVMContext &C = LongjmpF->getParent()->getContext(); + IRBuilder<> IRB(C); + + // For calls to longjmp, replace it with emscripten_longjmp and cast its first + // argument (jmp_buf*) to int + for (User *U : LongjmpF->users()) { + auto *CI = dyn_cast<CallInst>(U); + if (CI && CI->getCalledFunction() == LongjmpF) { + IRB.SetInsertPoint(CI); + Value *Jmpbuf = + IRB.CreatePtrToInt(CI->getArgOperand(0), IRB.getInt32Ty(), "jmpbuf"); + IRB.CreateCall(EmLongjmpF, {Jmpbuf, CI->getArgOperand(1)}); + ToErase.push_back(CI); + } + } + for (auto *I : ToErase) + I->eraseFromParent(); + + // If we have any remaining uses of longjmp's function pointer, replace it + // with (int(*)(jmp_buf*, int))emscripten_longjmp. + if (!LongjmpF->uses().empty()) { + Value *EmLongjmp = + IRB.CreateBitCast(EmLongjmpF, LongjmpF->getType(), "em_longjmp"); + LongjmpF->replaceAllUsesWith(EmLongjmp); + } +} + bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n"); @@ -642,11 +681,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty(); bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed); + if ((EnableEH || DoSjLj) && + Triple(M.getTargetTriple()).getArch() == Triple::wasm64) + report_fatal_error("Emscripten EH/SjLj is not supported with wasm64 yet"); + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + assert(TPC && "Expected a TargetPassConfig"); + auto &TM = TPC->getTM<WebAssemblyTargetMachine>(); + // Declare (or get) global variables __THREW__, __threwValue, and // getTempRet0/setTempRet0 function which are used in common for both // exception handling and setjmp/longjmp handling - ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__"); - ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue"); + ThrewGV = getGlobalVariableI32(M, IRB, TM, "__THREW__"); + ThrewValueGV = getGlobalVariableI32(M, IRB, TM, "__threwValue"); GetTempRet0Func = getEmscriptenFunction( FunctionType::get(IRB.getInt32Ty(), false), "getTempRet0", &M); SetTempRet0Func = getEmscriptenFunction( @@ -680,22 +727,21 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { if (DoSjLj) { Changed = true; // We have setjmp or longjmp somewhere - if (LongjmpF) { - // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is - // defined in JS code - EmLongjmpJmpbufF = getEmscriptenFunction(LongjmpF->getFunctionType(), - "emscripten_longjmp_jmpbuf", &M); - LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF); - } + // Register emscripten_longjmp function + FunctionType *FTy = FunctionType::get( + IRB.getVoidTy(), {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); + EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M); + + if (LongjmpF) + replaceLongjmpWithEmscriptenLongjmp(LongjmpF, EmLongjmpF); if (SetjmpF) { // Register saveSetjmp function FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); - FunctionType *FTy = - FunctionType::get(Type::getInt32PtrTy(C), - {SetjmpFTy->getParamType(0), IRB.getInt32Ty(), - Type::getInt32PtrTy(C), IRB.getInt32Ty()}, - false); + FTy = FunctionType::get(Type::getInt32PtrTy(C), + {SetjmpFTy->getParamType(0), IRB.getInt32Ty(), + Type::getInt32PtrTy(C), IRB.getInt32Ty()}, + false); SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M); // Register testSetjmp function @@ -704,10 +750,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}, false); TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M); - FTy = FunctionType::get(IRB.getVoidTy(), - {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); - EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M); - // Only traverse functions that uses setjmp in order not to insert // unnecessary prep / cleanup code in every function SmallPtrSet<Function *, 8> SetjmpUsers; @@ -769,7 +811,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { } else { // This can't throw, and we don't need this invoke, just replace it with a // call+branch - SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end()); + SmallVector<Value *, 16> Args(II->args()); CallInst *NewCall = IRB.CreateCall(II->getFunctionType(), II->getCalledOperand(), Args); NewCall->takeName(II); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 304dca2ebfe4..86d59ef807ab 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -38,29 +38,34 @@ cl::opt<bool> " instruction output for test purposes only."), cl::init(false)); +extern cl::opt<bool> EnableEmException; +extern cl::opt<bool> EnableEmSjLj; + static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI); MCSymbol * WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { const GlobalValue *Global = MO.getGlobal(); - auto *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global)); - - if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) { - const MachineFunction &MF = *MO.getParent()->getParent()->getParent(); - const TargetMachine &TM = MF.getTarget(); - const Function &CurrentFunc = MF.getFunction(); - - SmallVector<MVT, 1> ResultMVTs; - SmallVector<MVT, 4> ParamMVTs; - const auto *const F = dyn_cast<Function>(Global); - computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs); - - auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs); - WasmSym->setSignature(Signature.get()); - Printer.addSignature(std::move(Signature)); - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); - } - + if (!isa<Function>(Global)) + return cast<MCSymbolWasm>(Printer.getSymbol(Global)); + + const auto *FuncTy = cast<FunctionType>(Global->getValueType()); + const MachineFunction &MF = *MO.getParent()->getParent()->getParent(); + const TargetMachine &TM = MF.getTarget(); + const Function &CurrentFunc = MF.getFunction(); + + SmallVector<MVT, 1> ResultMVTs; + SmallVector<MVT, 4> ParamMVTs; + const auto *const F = dyn_cast<Function>(Global); + computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs); + auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs); + + bool InvokeDetected = false; + auto *WasmSym = Printer.getMCSymbolForFunction( + F, EnableEmException || EnableEmSjLj, Signature.get(), InvokeDetected); + WasmSym->setSignature(Signature.get()); + Printer.addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); return WasmSym; } @@ -134,6 +139,9 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO, case WebAssemblyII::MO_MEMORY_BASE_REL: Kind = MCSymbolRefExpr::VK_WASM_MBREL; break; + case WebAssemblyII::MO_TLS_BASE_REL: + Kind = MCSymbolRefExpr::VK_WASM_TLSREL; + break; case WebAssemblyII::MO_TABLE_BASE_REL: Kind = MCSymbolRefExpr::VK_WASM_TBREL; break; @@ -266,6 +274,11 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, SmallVector<wasm::ValType, 4>()); break; } + } else if (Info.OperandType == WebAssembly::OPERAND_HEAPTYPE) { + assert(static_cast<WebAssembly::HeapType>(MO.getImm()) != + WebAssembly::HeapType::Invalid); + // With typed function references, this will need a case for type + // index operands. Otherwise, fall through. } } MCOp = MCOperand::createImm(MO.getImm()); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index a2da0ea849e0..6bfed1a7195c 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -97,7 +97,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( // values through live-range splitting and stackification, it will have to // do. MF.getInfo<WebAssemblyFunctionInfo>()->setFrameBaseVreg( - SplitLIs.back()->reg); + SplitLIs.back()->reg()); } SplitLIs.clear(); } diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index a587c9d23d2b..ba1c4b7233f2 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -111,8 +111,11 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, case WebAssembly::V128RegClassID: CopyLocalOpc = WebAssembly::COPY_V128; break; - case WebAssembly::EXNREFRegClassID: - CopyLocalOpc = WebAssembly::COPY_EXNREF; + case WebAssembly::FUNCREFRegClassID: + CopyLocalOpc = WebAssembly::COPY_FUNCREF; + break; + case WebAssembly::EXTERNREFRegClassID: + CopyLocalOpc = WebAssembly::COPY_EXTERNREF; break; default: llvm_unreachable("Unexpected register class for return operand"); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 20fe2b2b7bfc..fe127dec8aed 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -106,8 +106,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { continue; LiveInterval *LI = &Liveness->getInterval(VReg); - assert(LI->weight == 0.0f); - LI->weight = computeWeight(MRI, MBFI, VReg); + assert(LI->weight() == 0.0f); + LI->setWeight(computeWeight(MRI, MBFI, VReg)); LLVM_DEBUG(LI->dump()); SortedIntervals.push_back(LI); } @@ -118,10 +118,10 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // TODO: Investigate more intelligent sorting heuristics. For starters, we // should try to coalesce adjacent live intervals before non-adjacent ones. llvm::sort(SortedIntervals, [MRI](LiveInterval *LHS, LiveInterval *RHS) { - if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg)) - return MRI->isLiveIn(LHS->reg); - if (LHS->weight != RHS->weight) - return LHS->weight > RHS->weight; + if (MRI->isLiveIn(LHS->reg()) != MRI->isLiveIn(RHS->reg())) + return MRI->isLiveIn(LHS->reg()); + if (LHS->weight() != RHS->weight()) + return LHS->weight() > RHS->weight(); if (LHS->empty() || RHS->empty()) return !LHS->empty() && RHS->empty(); return *LHS < *RHS; @@ -135,14 +135,14 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) { LiveInterval *LI = SortedIntervals[I]; - unsigned Old = LI->reg; + unsigned Old = LI->reg(); size_t Color = I; const TargetRegisterClass *RC = MRI->getRegClass(Old); // Check if it's possible to reuse any of the used colors. if (!MRI->isLiveIn(Old)) for (unsigned C : UsedColors.set_bits()) { - if (MRI->getRegClass(SortedIntervals[C]->reg) != RC) + if (MRI->getRegClass(SortedIntervals[C]->reg()) != RC) continue; for (LiveInterval *OtherLI : Assignments[C]) if (!OtherLI->empty() && OtherLI->overlaps(*LI)) @@ -152,7 +152,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { continue_outer:; } - unsigned New = SortedIntervals[Color]->reg; + unsigned New = SortedIntervals[Color]->reg(); SlotMapping[I] = New; Changed |= Old != New; UsedColors.set(Color); @@ -160,7 +160,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // If we reassigned the stack pointer, update the debug frame base info. if (Old != New && MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Old) MFI.setFrameBaseVreg(New); - LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg) + LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg()) << " to vreg" << Register::virtReg2Index(New) << "\n"); } if (!Changed) @@ -168,7 +168,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // Rewrite register operands. for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) { - unsigned Old = SortedIntervals[I]->reg; + unsigned Old = SortedIntervals[I]->reg(); unsigned New = SlotMapping[I]; if (Old != New) MRI->replaceRegWith(Old, New); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 1d4e2e3a8f9e..d474b9a2c1ee 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -123,7 +123,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI, } else if (RegClass == &WebAssembly::V128RegClass) { // TODO: Replace this with v128.const 0 once that is supported in V8 Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32)); + MI->setDesc(TII->get(WebAssembly::SPLAT_I32x4)); MI->addOperand(MachineOperand::CreateReg(TempReg, false)); MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(WebAssembly::CONST_I32), TempReg) @@ -342,7 +342,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, // instruction in which the current value is used, we cannot // stackify. Stackifying in this case would require that def moving below the // current def in the stack, which cannot be achieved, even with locals. - for (const auto &SubsequentDef : drop_begin(DefI->defs(), 1)) { + for (const auto &SubsequentDef : drop_begin(DefI->defs())) { for (const auto &PriorUse : UseI->uses()) { if (&PriorUse == Use) break; @@ -359,10 +359,9 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, if (NextI == Insert) return true; - // 'catch' and 'extract_exception' should be the first instruction of a BB and - // cannot move. - if (DefI->getOpcode() == WebAssembly::CATCH || - DefI->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) + // 'catch' and 'catch_all' should be the first instruction of a BB and cannot + // move. + if (WebAssembly::isCatch(DefI->getOpcode())) return false; // Check for register dependencies. @@ -595,7 +594,7 @@ static MachineInstr *rematerializeCheapDef( if (IsDead) { LLVM_DEBUG(dbgs() << " - Deleting original\n"); SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot(); - LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx); + LIS.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS), Idx); LIS.removeInterval(Reg); LIS.RemoveMachineInstrFromMaps(Def); Def.eraseFromParent(); @@ -693,7 +692,7 @@ class TreeWalkerState { public: explicit TreeWalkerState(MachineInstr *Insert) { const iterator_range<mop_iterator> &Range = Insert->explicit_uses(); - if (Range.begin() != Range.end()) + if (!Range.empty()) Worklist.push_back(reverse(Range)); } @@ -702,11 +701,10 @@ public: MachineOperand &pop() { RangeTy &Range = Worklist.back(); MachineOperand &Op = *Range.begin(); - Range = drop_begin(Range, 1); - if (Range.begin() == Range.end()) + Range = drop_begin(Range); + if (Range.empty()) Worklist.pop_back(); - assert((Worklist.empty() || - Worklist.back().begin() != Worklist.back().end()) && + assert((Worklist.empty() || !Worklist.back().empty()) && "Empty ranges shouldn't remain in the worklist"); return Op; } @@ -714,7 +712,7 @@ public: /// Push Instr's operands onto the stack to be visited. void pushOperands(MachineInstr *Instr) { const iterator_range<mop_iterator> &Range(Instr->explicit_uses()); - if (Range.begin() != Range.end()) + if (!Range.empty()) Worklist.push_back(reverse(Range)); } @@ -733,7 +731,7 @@ public: if (Worklist.empty()) return false; const RangeTy &Range = Worklist.back(); - return Range.begin() != Range.end() && Range.begin()->getParent() == Instr; + return !Range.empty() && Range.begin()->getParent() == Instr; } /// Test whether the given register is present on the stack, indicating an @@ -865,24 +863,6 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (WebAssembly::isArgument(DefI->getOpcode())) continue; - // Currently catch's return value register cannot be stackified, because - // the wasm LLVM backend currently does not support live-in values - // entering blocks, which is a part of multi-value proposal. - // - // Once we support live-in values of wasm blocks, this can be: - // catch ; push exnref value onto stack - // block exnref -> i32 - // br_on_exn $__cpp_exception ; pop the exnref value - // end_block - // - // But because we don't support it yet, the catch instruction's dst - // register should be assigned to a local to be propagated across - // 'block' boundary now. - // - // TODO: Fix this once we support the multivalue blocks - if (DefI->getOpcode() == WebAssembly::CATCH) - continue; - MachineOperand *Def = DefI->findRegisterDefOperand(Reg); assert(Def != nullptr); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 6d3d6c723277..ba2936b492a9 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -43,7 +43,8 @@ def F64_0 : WebAssemblyReg<"%f64.0">; def V128_0: WebAssemblyReg<"%v128">; -def EXNREF_0 : WebAssemblyReg<"%exnref.0">; +def FUNCREF_0 : WebAssemblyReg<"%funcref.0">; +def EXTERNREF_0 : WebAssemblyReg<"%externref.0">; // The value stack "register". This is an opaque entity which serves to order // uses and defs that must remain in LIFO order. @@ -64,4 +65,5 @@ def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128, (add V128_0)>; -def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>; +def FUNCREF : WebAssemblyRegClass<[funcref], 0, (add FUNCREF_0)>; +def EXTERNREF : WebAssemblyRegClass<[externref], 0, (add EXTERNREF_0)>; diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp new file mode 100644 index 000000000000..cd84e68aed14 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp @@ -0,0 +1,78 @@ +#include "WebAssemblySortRegion.h" +#include "WebAssemblyExceptionInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" + +using namespace llvm; +using namespace WebAssembly; + +namespace llvm { +namespace WebAssembly { +template <> +bool ConcreteSortRegion<MachineLoop>::isLoop() const { + return true; +} +} // end namespace WebAssembly +} // end namespace llvm + +const SortRegion *SortRegionInfo::getRegionFor(const MachineBasicBlock *MBB) { + const auto *ML = MLI.getLoopFor(MBB); + const auto *WE = WEI.getExceptionFor(MBB); + if (!ML && !WE) + return nullptr; + // We determine subregion relationship by domination of their headers, i.e., + // if region A's header dominates region B's header, B is a subregion of A. + // WebAssemblyException contains BBs in all its subregions (loops or + // exceptions), but MachineLoop may not, because MachineLoop does not + // contain BBs that don't have a path to its header even if they are + // dominated by its header. So here we should use + // WE->contains(ML->getHeader()), but not ML->contains(WE->getHeader()). + if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) { + // If the smallest region containing MBB is a loop + if (LoopMap.count(ML)) + return LoopMap[ML].get(); + LoopMap[ML] = std::make_unique<ConcreteSortRegion<MachineLoop>>(ML); + return LoopMap[ML].get(); + } else { + // If the smallest region containing MBB is an exception + if (ExceptionMap.count(WE)) + return ExceptionMap[WE].get(); + ExceptionMap[WE] = + std::make_unique<ConcreteSortRegion<WebAssemblyException>>(WE); + return ExceptionMap[WE].get(); + } +} + +MachineBasicBlock *SortRegionInfo::getBottom(const SortRegion *R) { + if (R->isLoop()) + return getBottom(MLI.getLoopFor(R->getHeader())); + else + return getBottom(WEI.getExceptionFor(R->getHeader())); +} + +MachineBasicBlock *SortRegionInfo::getBottom(const MachineLoop *ML) { + MachineBasicBlock *Bottom = ML->getHeader(); + for (MachineBasicBlock *MBB : ML->blocks()) { + if (MBB->getNumber() > Bottom->getNumber()) + Bottom = MBB; + // MachineLoop does not contain all BBs dominated by its header. BBs that + // don't have a path back to the loop header aren't included. But for the + // purpose of CFG sorting and stackification, we need a bottom BB among all + // BBs that are dominated by the loop header. So we check if there is any + // WebAssemblyException contained in this loop, and computes the most bottom + // BB of them all. + if (MBB->isEHPad()) { + MachineBasicBlock *ExBottom = getBottom(WEI.getExceptionFor(MBB)); + if (ExBottom->getNumber() > Bottom->getNumber()) + Bottom = ExBottom; + } + } + return Bottom; +} + +MachineBasicBlock *SortRegionInfo::getBottom(const WebAssemblyException *WE) { + MachineBasicBlock *Bottom = WE->getHeader(); + for (MachineBasicBlock *MBB : WE->blocks()) + if (MBB->getNumber() > Bottom->getNumber()) + Bottom = MBB; + return Bottom; +} diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h new file mode 100644 index 000000000000..e92bf1764185 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h @@ -0,0 +1,91 @@ +//===-- WebAssemblySortRegion.h - WebAssembly Sort SortRegion ----*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements regions used in CFGSort and CFGStackify. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSORTREGION_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSORTREGION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator_range.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineLoop; +class MachineLoopInfo; +class WebAssemblyException; +class WebAssemblyExceptionInfo; + +namespace WebAssembly { + +// Wrapper for loops and exceptions +class SortRegion { +public: + virtual ~SortRegion() = default; + virtual MachineBasicBlock *getHeader() const = 0; + virtual bool contains(const MachineBasicBlock *MBB) const = 0; + virtual unsigned getNumBlocks() const = 0; + using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; + virtual iterator_range<block_iterator> blocks() const = 0; + virtual bool isLoop() const = 0; +}; + +template <typename T> class ConcreteSortRegion : public SortRegion { + const T *Unit; + +public: + ConcreteSortRegion(const T *Unit) : Unit(Unit) {} + MachineBasicBlock *getHeader() const override { return Unit->getHeader(); } + bool contains(const MachineBasicBlock *MBB) const override { + return Unit->contains(MBB); + } + unsigned getNumBlocks() const override { return Unit->getNumBlocks(); } + iterator_range<block_iterator> blocks() const override { + return Unit->blocks(); + } + bool isLoop() const override { return false; } +}; + +// This class has information of nested SortRegions; this is analogous to what +// LoopInfo is for loops. +class SortRegionInfo { + friend class ConcreteSortRegion<MachineLoopInfo>; + friend class ConcreteSortRegion<WebAssemblyException>; + + const MachineLoopInfo &MLI; + const WebAssemblyExceptionInfo &WEI; + DenseMap<const MachineLoop *, std::unique_ptr<SortRegion>> LoopMap; + DenseMap<const WebAssemblyException *, std::unique_ptr<SortRegion>> + ExceptionMap; + +public: + SortRegionInfo(const MachineLoopInfo &MLI, + const WebAssemblyExceptionInfo &WEI) + : MLI(MLI), WEI(WEI) {} + + // Returns a smallest loop or exception that contains MBB + const SortRegion *getRegionFor(const MachineBasicBlock *MBB); + + // Return the "bottom" block among all blocks dominated by the region + // (MachineLoop or WebAssemblyException) header. This works when the entity is + // discontiguous. + MachineBasicBlock *getBottom(const SortRegion *R); + MachineBasicBlock *getBottom(const MachineLoop *ML); + MachineBasicBlock *getBottom(const WebAssemblyException *WE); +}; + +} // end namespace WebAssembly + +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index cacf5ab078a0..7943e1ecc8e1 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -33,7 +33,7 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, if (CPU.empty()) CPU = "generic"; - ParseSubtargetFeatures(CPU, FS); + ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); return *this; } @@ -41,9 +41,10 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : WebAssemblyGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), - FrameLowering(), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TSInfo(), TLInfo(TM, *this) {} + : WebAssemblyGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), + TargetTriple(TT), FrameLowering(), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TSInfo(), + TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableAtomicExpand() const { // If atomics are disabled, atomic ops are lowered instead of expanded diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 8b95a3ddb837..a1c872ef2135 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -105,7 +105,7 @@ public: /// Parses features string setting specified subtarget options. Definition of /// function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 7bf655c925a4..135055a43afc 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -34,13 +34,13 @@ using namespace llvm; #define DEBUG_TYPE "wasm" // Emscripten's asm.js-style exception handling -static cl::opt<bool> EnableEmException( +cl::opt<bool> EnableEmException( "enable-emscripten-cxx-exceptions", cl::desc("WebAssembly Emscripten-style exception handling"), cl::init(false)); // Emscripten's asm.js-style setjmp/longjmp handling -static cl::opt<bool> EnableEmSjLj( +cl::opt<bool> EnableEmSjLj( "enable-emscripten-sjlj", cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"), cl::init(false)); @@ -145,6 +145,11 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor. +const WebAssemblySubtarget *WebAssemblyTargetMachine::getSubtargetImpl() const { + return getSubtargetImpl(std::string(getTargetCPU()), + std::string(getTargetFeatureString())); +} + const WebAssemblySubtarget * WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, std::string FS) const { @@ -160,12 +165,10 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); - std::string CPU = !CPUAttr.hasAttribute(Attribute::None) - ? CPUAttr.getValueAsString().str() - : TargetCPU; - std::string FS = !FSAttr.hasAttribute(Attribute::None) - ? FSAttr.getValueAsString().str() - : TargetFS; + std::string CPU = + CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string FS = + FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the @@ -193,6 +196,7 @@ public: FeatureBitset Features = coalesceFeatures(M); std::string FeatureStr = getFeatureString(Features); + WasmTM->setTargetFeatureString(FeatureStr); for (auto &F : M) replaceFeatures(F, FeatureStr); @@ -273,10 +277,9 @@ private: bool stripThreadLocals(Module &M) { bool Stripped = false; for (auto &GV : M.globals()) { - if (GV.getThreadLocalMode() != - GlobalValue::ThreadLocalMode::NotThreadLocal) { + if (GV.isThreadLocal()) { Stripped = true; - GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal); + GV.setThreadLocal(false); } } return Stripped; @@ -323,10 +326,10 @@ public: void addPreEmitPass() override; // No reg alloc - bool addRegAssignmentFast() override { return false; } + bool addRegAssignAndRewriteFast() override { return false; } // No reg alloc - bool addRegAssignmentOptimized() override { return false; } + bool addRegAssignAndRewriteOptimized() override { return false; } }; } // end anonymous namespace @@ -350,7 +353,7 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { //===----------------------------------------------------------------------===// void WebAssemblyPassConfig::addIRPasses() { - // Runs LowerAtomicPass if necessary + // Lower atomics and TLS if necessary addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); // This is a no-op if atomics are not used in the module @@ -443,7 +446,8 @@ void WebAssemblyPassConfig::addPreEmitPass() { // Do various transformations for exception handling. // Every CFG-changing optimizations should come before this. - addPass(createWebAssemblyLateEHPrepare()); + if (TM->Options.ExceptionModel == ExceptionHandling::Wasm) + addPass(createWebAssemblyLateEHPrepare()); // Now that we have a prologue and epilogue and all frame indices are // rewritten, eliminate SP and FP. This allows them to be stackified, diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h index dd5b39773313..29e968bfe8eb 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -33,6 +33,7 @@ public: ~WebAssemblyTargetMachine() override; + const WebAssemblySubtarget *getSubtargetImpl() const; const WebAssemblySubtarget *getSubtargetImpl(std::string CPU, std::string FS) const; const WebAssemblySubtarget * diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 28703a2787e0..be1cfbaef3e4 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -84,3 +84,21 @@ unsigned WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, return Cost; } + +bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + // Allow inlining only when the Callee has a subset of the Caller's + // features. In principle, we should be able to inline regardless of any + // features because WebAssembly supports features at module granularity, not + // function granularity, but without this restriction it would be possible for + // a module to "forget" about features if all the functions that used them + // were inlined. + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + return (CallerBits & CalleeBits) == CalleeBits; +} diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 79588a9f5669..41e358c159b4 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -67,6 +67,9 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); /// @} + + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index bc2bb4fd6935..f8fb57d8a461 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -15,6 +15,7 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/MC/MCContext.h" using namespace llvm; const char *const WebAssembly::ClangCallTerminateFn = "__clang_call_terminate"; @@ -96,3 +97,35 @@ const MachineOperand &WebAssembly::getCalleeOp(const MachineInstr &MI) { llvm_unreachable("Not a call instruction"); } } + +MCSymbolWasm * +WebAssembly::getOrCreateFunctionTableSymbol(MCContext &Ctx, + const StringRef &Name) { + // FIXME: Duplicates functionality from + // MC/WasmObjectWriter::recordRelocation. + MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name)); + if (Sym) { + if (!Sym->isFunctionTable()) + Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table"); + } else { + Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name)); + Sym->setFunctionTable(); + // The default function table is synthesized by the linker. + Sym->setUndefined(); + } + return Sym; +} + +// Find a catch instruction from an EH pad. +MachineInstr *WebAssembly::findCatch(MachineBasicBlock *EHPad) { + assert(EHPad->isEHPad()); + auto Pos = EHPad->begin(); + // Skip any label or debug instructions. Also skip 'end' marker instructions + // that may exist after marker placement in CFGStackify. + while (Pos != EHPad->end() && + (Pos->isLabel() || Pos->isDebugInstr() || isMarker(Pos->getOpcode()))) + Pos++; + if (Pos != EHPad->end() && WebAssembly::isCatch(Pos->getOpcode())) + return &*Pos; + return nullptr; +} diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h index 4f0ed43a2481..41ad7869cf46 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -15,10 +15,14 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H -#include "llvm/CodeGen/MachineBasicBlock.h" - namespace llvm { +class MachineBasicBlock; +class MachineInstr; +class MachineOperand; +class MCContext; +class MCSymbolWasm; +class StringRef; class WebAssemblyFunctionInfo; namespace WebAssembly { @@ -33,21 +37,19 @@ extern const char *const CxaRethrowFn; extern const char *const StdTerminateFn; extern const char *const PersonalityWrapperFn; -/// Return the "bottom" block of an entity, which can be either a MachineLoop or -/// WebAssemblyException. This differs from MachineLoop::getBottomBlock in that -/// it works even if the entity is discontiguous. -template <typename T> MachineBasicBlock *getBottom(const T *Unit) { - MachineBasicBlock *Bottom = Unit->getHeader(); - for (MachineBasicBlock *MBB : Unit->blocks()) - if (MBB->getNumber() > Bottom->getNumber()) - Bottom = MBB; - return Bottom; -} - /// Returns the operand number of a callee, assuming the argument is a call /// instruction. const MachineOperand &getCalleeOp(const MachineInstr &MI); +/// Returns the operand number of a callee, assuming the argument is a call +/// instruction. +MCSymbolWasm *getOrCreateFunctionTableSymbol(MCContext &Ctx, + const StringRef &Name); + +/// Find a catch instruction from an EH pad. Returns null if no catch +/// instruction found or the catch is in an invalid location. +MachineInstr *findCatch(MachineBasicBlock *EHPad); + } // end namespace WebAssembly } // end namespace llvm |