diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/WebAssembly | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Target/WebAssembly')
75 files changed, 3127 insertions, 2213 deletions
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index ea99cee3eb3bf..e29d85d7588d2 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -164,6 +164,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { // Much like WebAssemblyAsmPrinter in the backend, we have to own these. std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures; + std::vector<std::unique_ptr<std::string>> Names; // Order of labels, directives and instructions in a .s file have no // syntactical enforcement. This class is a callback from the actual parser, @@ -214,6 +215,11 @@ public: SMLoc & /*EndLoc*/) override { llvm_unreachable("ParseRegister is not implemented."); } + OperandMatchResultTy tryParseRegister(unsigned & /*RegNo*/, + SMLoc & /*StartLoc*/, + SMLoc & /*EndLoc*/) override { + llvm_unreachable("tryParseRegister is not implemented."); + } bool error(const Twine &Msg, const AsmToken &Tok) { return Parser.Error(Tok.getLoc(), Msg + Tok.getString()); @@ -227,6 +233,12 @@ public: Signatures.push_back(std::move(Sig)); } + StringRef storeName(StringRef Name) { + std::unique_ptr<std::string> N = std::make_unique<std::string>(Name); + Names.push_back(std::move(N)); + return *Names.back(); + } + std::pair<StringRef, StringRef> nestingString(NestingType NT) { switch (NT) { case Function: @@ -310,6 +322,8 @@ public: return wasm::ValType::V128; if (Type == "exnref") return wasm::ValType::EXNREF; + if (Type == "externref") + return wasm::ValType::EXTERNREF; return Optional<wasm::ValType>(); } @@ -430,7 +444,7 @@ public: Name = StringRef(NameLoc.getPointer(), Name.size()); // WebAssembly has instructions with / in them, which AsmLexer parses - // as seperate tokens, so if we find such tokens immediately adjacent (no + // as separate tokens, so if we find such tokens immediately adjacent (no // whitespace), expand the name to include them: for (;;) { auto &Sep = Lexer.getTok(); @@ -688,7 +702,7 @@ public: // WebAssemblyAsmPrinter::EmitFunctionBodyStart. // TODO: would be good to factor this into a common function, but the // assembler and backend really don't share any common code, and this code - // parses the locals seperately. + // parses the locals separately. auto SymName = expectIdent(); if (SymName.empty()) return true; @@ -720,7 +734,7 @@ public: return true; auto ExportName = expectIdent(); auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); - WasmSym->setExportName(ExportName); + WasmSym->setExportName(storeName(ExportName)); TOut.emitExportName(WasmSym, ExportName); } @@ -732,7 +746,7 @@ public: return true; auto ImportModule = expectIdent(); auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); - WasmSym->setImportModule(ImportModule); + WasmSym->setImportModule(storeName(ImportModule)); TOut.emitImportModule(WasmSym, ImportModule); } @@ -744,7 +758,7 @@ public: return true; auto ImportName = expectIdent(); auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); - WasmSym->setImportName(ImportName); + WasmSym->setImportName(storeName(ImportName)); TOut.emitImportName(WasmSym, ImportName); } @@ -787,7 +801,7 @@ public: return error("Cannot parse .int expression: ", Lexer.getTok()); size_t NumBits = 0; DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits); - Out.EmitValue(Val, NumBits / 8, End); + Out.emitValue(Val, NumBits / 8, End); return expect(AsmToken::EndOfStatement, "EOL"); } @@ -796,7 +810,7 @@ public: std::string S; if (Parser.parseEscapedString(S)) return error("Cannot parse string constant: ", Lexer.getTok()); - Out.EmitBytes(StringRef(S.c_str(), S.length() + 1)); + Out.emitBytes(StringRef(S.c_str(), S.length() + 1)); return expect(AsmToken::EndOfStatement, "EOL"); } @@ -834,7 +848,17 @@ public: if (Op0.getImm() == -1) Op0.setImm(Align); } - Out.EmitInstruction(Inst, getSTI()); + if (getSTI().getTargetTriple().isArch64Bit()) { + // Upgrade 32-bit loads/stores to 64-bit. These mostly differ by having + // an offset64 arg instead of offset32, but to the assembler matcher + // they're both immediates so don't get selected for. + auto Opc64 = WebAssembly::getWasm64Opcode( + static_cast<uint16_t>(Inst.getOpcode())); + if (Opc64 >= 0) { + Inst.setOpcode(Opc64); + } + } + Out.emitInstruction(Inst, getSTI()); if (CurrentState == EndFunction) { onEndOfFunction(); } else { @@ -879,6 +903,9 @@ public: auto SecName = ".text." + SymName; auto WS = getContext().getWasmSection(SecName, SectionKind::getText()); getStreamer().SwitchSection(WS); + // Also generate DWARF for this section if requested. + if (getContext().getGenDwarfForAssembly()) + getContext().addGenDwarfSection(WS); } void onEndOfFunction() { @@ -886,7 +913,7 @@ public: // user. if (!LastFunctionLabel) return; auto TempSym = getContext().createLinkerPrivateTempSymbol(); - getStreamer().EmitLabel(TempSym); + getStreamer().emitLabel(TempSym); auto Start = MCSymbolRefExpr::create(LastFunctionLabel, getContext()); auto End = MCSymbolRefExpr::create(TempSym, getContext()); auto Expr = diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index a8cb5d18537c4..42fa6d58fffde 100644 --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -46,9 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler { DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &CStream) const override; - DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, - ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &CStream) const override; + Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &CStream) const override; public: WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, @@ -120,29 +121,29 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { return true; } -MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( - StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &CStream) const { +Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart( + SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &CStream) const { Size = 0; if (Address == 0) { // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) - return MCDisassembler::Fail; + return None; for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; @@ -198,6 +199,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( case WebAssembly::OPERAND_GLOBAL: case WebAssembly::OPERAND_FUNCTION32: case WebAssembly::OPERAND_OFFSET32: + case WebAssembly::OPERAND_OFFSET64: case WebAssembly::OPERAND_P2ALIGN: case WebAssembly::OPERAND_TYPEINDEX: case WebAssembly::OPERAND_EVENT: diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 8314de41021ff..8ecd7c53621df 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -64,9 +64,6 @@ public: return false; } - void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - MCInst &Res) const override {} - bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; @@ -80,6 +77,7 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_sleb128_i32", 0, 5 * 8, 0}, {"fixup_sleb128_i64", 0, 10 * 8, 0}, {"fixup_uleb128_i32", 0, 5 * 8, 0}, + {"fixup_uleb128_i64", 0, 10 * 8, 0}, }; if (Kind < FirstTargetFixupKind) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h index 33e8de2829552..92708dadd3e00 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h @@ -17,6 +17,7 @@ enum Fixups { fixup_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed fixup_sleb128_i64, // 64-bit signed fixup_uleb128_i32, // 32-bit unsigned + fixup_uleb128_i64, // 64-bit unsigned // Marker LastTargetFixupKind, diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index b262e06e55e72..f60b5fcd14ec7 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -54,17 +54,28 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address, // Print any additional variadic operands. const MCInstrDesc &Desc = MII.get(MI->getOpcode()); if (Desc.isVariadic()) { - if (Desc.getNumOperands() == 0 && MI->getNumOperands() > 0) + if ((Desc.getNumOperands() == 0 && MI->getNumOperands() > 0) || + Desc.variadicOpsAreDefs()) OS << "\t"; - for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) { - // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because - // we have an extra flags operand which is not currently printed, for - // compatiblity reasons. - if (I != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID && - MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) || - I != Desc.getNumOperands())) + unsigned Start = Desc.getNumOperands(); + unsigned NumVariadicDefs = 0; + if (Desc.variadicOpsAreDefs()) { + // The number of variadic defs is encoded in an immediate by MCInstLower + NumVariadicDefs = MI->getOperand(0).getImm(); + Start = 1; + } + bool NeedsComma = Desc.getNumOperands() > 0 && !Desc.variadicOpsAreDefs(); + for (auto I = Start, E = MI->getNumOperands(); I < E; ++I) { + if (MI->getOpcode() == WebAssembly::CALL_INDIRECT && + I - Start == NumVariadicDefs) { + // Skip type and flags arguments when printing for tests + ++I; + continue; + } + if (NeedsComma) OS << ", "; - printOperand(MI, I, OS); + printOperand(MI, I, OS, I - Start < NumVariadicDefs); + NeedsComma = true; } } @@ -207,20 +218,21 @@ static std::string toString(const APFloat &FP) { } void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { + raw_ostream &O, bool IsVariadicDef) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); unsigned WAReg = Op.getReg(); if (int(WAReg) >= 0) printRegName(O, WAReg); - else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs()) + else if (OpNo >= Desc.getNumDefs() && !IsVariadicDef) O << "$pop" << WebAssemblyFunctionInfo::getWARegStackId(WAReg); else if (WAReg != WebAssemblyFunctionInfo::UnusedReg) O << "$push" << WebAssemblyFunctionInfo::getWARegStackId(WAReg); else O << "$drop"; // Add a '=' suffix if this is a def. - if (OpNo < MII.get(MI->getOpcode()).getNumDefs()) + if (OpNo < MII.get(MI->getOpcode()).getNumDefs() || IsVariadicDef) O << '='; } else if (Op.isImm()) { O << Op.getImm(); diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h index bee85507f044f..1387a1928b3fb 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h @@ -41,7 +41,8 @@ public: const MCSubtargetInfo &STI, raw_ostream &OS) override; // Used by tblegen code. - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + bool IsVariadicDef = false); void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 1a4c57e66d2fa..dfed3451e45b1 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -158,6 +158,10 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( case WebAssembly::OPERAND_EVENT: FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32); break; + case WebAssembly::OPERAND_OFFSET64: + FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i64); + PaddedSize = 10; + break; default: llvm_unreachable("unexpected symbolic operand kind"); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index b339860a381d0..02b310628ee17 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -24,14 +24,10 @@ namespace llvm { class MCAsmBackend; class MCCodeEmitter; -class MCContext; class MCInstrInfo; class MCObjectTargetWriter; -class MCSubtargetInfo; class MVT; -class Target; class Triple; -class raw_pwrite_stream; MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); @@ -68,6 +64,8 @@ enum OperandType { OPERAND_FUNCTION32, /// 32-bit unsigned memory offsets. OPERAND_OFFSET32, + /// 64-bit unsigned memory offsets. + OPERAND_OFFSET64, /// p2align immediate for load and store address alignment. OPERAND_P2ALIGN, /// signature immediate for block/loop. @@ -149,216 +147,121 @@ wasm::ValType toValType(const MVT &Ty); /// Return the default p2align value for a load or store with the given opcode. inline unsigned GetDefaultP2AlignAny(unsigned Opc) { switch (Opc) { - case WebAssembly::LOAD8_S_I32: - case WebAssembly::LOAD8_S_I32_S: - case WebAssembly::LOAD8_U_I32: - case WebAssembly::LOAD8_U_I32_S: - case WebAssembly::LOAD8_S_I64: - case WebAssembly::LOAD8_S_I64_S: - case WebAssembly::LOAD8_U_I64: - case WebAssembly::LOAD8_U_I64_S: - case WebAssembly::ATOMIC_LOAD8_U_I32: - case WebAssembly::ATOMIC_LOAD8_U_I32_S: - case WebAssembly::ATOMIC_LOAD8_U_I64: - case WebAssembly::ATOMIC_LOAD8_U_I64_S: - case WebAssembly::STORE8_I32: - case WebAssembly::STORE8_I32_S: - case WebAssembly::STORE8_I64: - case WebAssembly::STORE8_I64_S: - case WebAssembly::ATOMIC_STORE8_I32: - case WebAssembly::ATOMIC_STORE8_I32_S: - case WebAssembly::ATOMIC_STORE8_I64: - case WebAssembly::ATOMIC_STORE8_I64_S: - case WebAssembly::ATOMIC_RMW8_U_ADD_I32: - case WebAssembly::ATOMIC_RMW8_U_ADD_I32_S: - case WebAssembly::ATOMIC_RMW8_U_ADD_I64: - case WebAssembly::ATOMIC_RMW8_U_ADD_I64_S: - case WebAssembly::ATOMIC_RMW8_U_SUB_I32: - case WebAssembly::ATOMIC_RMW8_U_SUB_I32_S: - case WebAssembly::ATOMIC_RMW8_U_SUB_I64: - case WebAssembly::ATOMIC_RMW8_U_SUB_I64_S: - case WebAssembly::ATOMIC_RMW8_U_AND_I32: - case WebAssembly::ATOMIC_RMW8_U_AND_I32_S: - case WebAssembly::ATOMIC_RMW8_U_AND_I64: - case WebAssembly::ATOMIC_RMW8_U_AND_I64_S: - case WebAssembly::ATOMIC_RMW8_U_OR_I32: - case WebAssembly::ATOMIC_RMW8_U_OR_I32_S: - case WebAssembly::ATOMIC_RMW8_U_OR_I64: - case WebAssembly::ATOMIC_RMW8_U_OR_I64_S: - case WebAssembly::ATOMIC_RMW8_U_XOR_I32: - case WebAssembly::ATOMIC_RMW8_U_XOR_I32_S: - case WebAssembly::ATOMIC_RMW8_U_XOR_I64: - case WebAssembly::ATOMIC_RMW8_U_XOR_I64_S: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S: - case WebAssembly::LOAD_SPLAT_v8x16: - case WebAssembly::LOAD_SPLAT_v8x16_S: +#define WASM_LOAD_STORE(NAME) \ + case WebAssembly::NAME##_A32: \ + case WebAssembly::NAME##_A64: \ + case WebAssembly::NAME##_A32_S: \ + case WebAssembly::NAME##_A64_S: + WASM_LOAD_STORE(LOAD8_S_I32) + WASM_LOAD_STORE(LOAD8_U_I32) + WASM_LOAD_STORE(LOAD8_S_I64) + WASM_LOAD_STORE(LOAD8_U_I64) + WASM_LOAD_STORE(ATOMIC_LOAD8_U_I32) + WASM_LOAD_STORE(ATOMIC_LOAD8_U_I64) + WASM_LOAD_STORE(STORE8_I32) + WASM_LOAD_STORE(STORE8_I64) + WASM_LOAD_STORE(ATOMIC_STORE8_I32) + WASM_LOAD_STORE(ATOMIC_STORE8_I64) + WASM_LOAD_STORE(ATOMIC_RMW8_U_ADD_I32) + WASM_LOAD_STORE(ATOMIC_RMW8_U_ADD_I64) + WASM_LOAD_STORE(ATOMIC_RMW8_U_SUB_I32) + WASM_LOAD_STORE(ATOMIC_RMW8_U_SUB_I64) + WASM_LOAD_STORE(ATOMIC_RMW8_U_AND_I32) + WASM_LOAD_STORE(ATOMIC_RMW8_U_AND_I64) + WASM_LOAD_STORE(ATOMIC_RMW8_U_OR_I32) + WASM_LOAD_STORE(ATOMIC_RMW8_U_OR_I64) + WASM_LOAD_STORE(ATOMIC_RMW8_U_XOR_I32) + WASM_LOAD_STORE(ATOMIC_RMW8_U_XOR_I64) + WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I32) + WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I64) + WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32) + WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64) + WASM_LOAD_STORE(LOAD_SPLAT_v8x16) return 0; - case WebAssembly::LOAD16_S_I32: - case WebAssembly::LOAD16_S_I32_S: - case WebAssembly::LOAD16_U_I32: - case WebAssembly::LOAD16_U_I32_S: - case WebAssembly::LOAD16_S_I64: - case WebAssembly::LOAD16_S_I64_S: - case WebAssembly::LOAD16_U_I64: - case WebAssembly::LOAD16_U_I64_S: - case WebAssembly::ATOMIC_LOAD16_U_I32: - case WebAssembly::ATOMIC_LOAD16_U_I32_S: - case WebAssembly::ATOMIC_LOAD16_U_I64: - case WebAssembly::ATOMIC_LOAD16_U_I64_S: - case WebAssembly::STORE16_I32: - case WebAssembly::STORE16_I32_S: - case WebAssembly::STORE16_I64: - case WebAssembly::STORE16_I64_S: - case WebAssembly::ATOMIC_STORE16_I32: - case WebAssembly::ATOMIC_STORE16_I32_S: - case WebAssembly::ATOMIC_STORE16_I64: - case WebAssembly::ATOMIC_STORE16_I64_S: - case WebAssembly::ATOMIC_RMW16_U_ADD_I32: - case WebAssembly::ATOMIC_RMW16_U_ADD_I32_S: - case WebAssembly::ATOMIC_RMW16_U_ADD_I64: - case WebAssembly::ATOMIC_RMW16_U_ADD_I64_S: - case WebAssembly::ATOMIC_RMW16_U_SUB_I32: - case WebAssembly::ATOMIC_RMW16_U_SUB_I32_S: - case WebAssembly::ATOMIC_RMW16_U_SUB_I64: - case WebAssembly::ATOMIC_RMW16_U_SUB_I64_S: - case WebAssembly::ATOMIC_RMW16_U_AND_I32: - case WebAssembly::ATOMIC_RMW16_U_AND_I32_S: - case WebAssembly::ATOMIC_RMW16_U_AND_I64: - case WebAssembly::ATOMIC_RMW16_U_AND_I64_S: - case WebAssembly::ATOMIC_RMW16_U_OR_I32: - case WebAssembly::ATOMIC_RMW16_U_OR_I32_S: - case WebAssembly::ATOMIC_RMW16_U_OR_I64: - case WebAssembly::ATOMIC_RMW16_U_OR_I64_S: - case WebAssembly::ATOMIC_RMW16_U_XOR_I32: - case WebAssembly::ATOMIC_RMW16_U_XOR_I32_S: - case WebAssembly::ATOMIC_RMW16_U_XOR_I64: - case WebAssembly::ATOMIC_RMW16_U_XOR_I64_S: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S: - case WebAssembly::LOAD_SPLAT_v16x8: - case WebAssembly::LOAD_SPLAT_v16x8_S: + WASM_LOAD_STORE(LOAD16_S_I32) + WASM_LOAD_STORE(LOAD16_U_I32) + WASM_LOAD_STORE(LOAD16_S_I64) + WASM_LOAD_STORE(LOAD16_U_I64) + WASM_LOAD_STORE(ATOMIC_LOAD16_U_I32) + WASM_LOAD_STORE(ATOMIC_LOAD16_U_I64) + WASM_LOAD_STORE(STORE16_I32) + WASM_LOAD_STORE(STORE16_I64) + WASM_LOAD_STORE(ATOMIC_STORE16_I32) + WASM_LOAD_STORE(ATOMIC_STORE16_I64) + WASM_LOAD_STORE(ATOMIC_RMW16_U_ADD_I32) + WASM_LOAD_STORE(ATOMIC_RMW16_U_ADD_I64) + WASM_LOAD_STORE(ATOMIC_RMW16_U_SUB_I32) + WASM_LOAD_STORE(ATOMIC_RMW16_U_SUB_I64) + WASM_LOAD_STORE(ATOMIC_RMW16_U_AND_I32) + WASM_LOAD_STORE(ATOMIC_RMW16_U_AND_I64) + WASM_LOAD_STORE(ATOMIC_RMW16_U_OR_I32) + WASM_LOAD_STORE(ATOMIC_RMW16_U_OR_I64) + WASM_LOAD_STORE(ATOMIC_RMW16_U_XOR_I32) + WASM_LOAD_STORE(ATOMIC_RMW16_U_XOR_I64) + WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I32) + WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I64) + WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32) + WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64) + WASM_LOAD_STORE(LOAD_SPLAT_v16x8) return 1; - case WebAssembly::LOAD_I32: - case WebAssembly::LOAD_I32_S: - case WebAssembly::LOAD_F32: - case WebAssembly::LOAD_F32_S: - case WebAssembly::STORE_I32: - case WebAssembly::STORE_I32_S: - case WebAssembly::STORE_F32: - case WebAssembly::STORE_F32_S: - case WebAssembly::LOAD32_S_I64: - case WebAssembly::LOAD32_S_I64_S: - case WebAssembly::LOAD32_U_I64: - case WebAssembly::LOAD32_U_I64_S: - case WebAssembly::STORE32_I64: - case WebAssembly::STORE32_I64_S: - case WebAssembly::ATOMIC_LOAD_I32: - case WebAssembly::ATOMIC_LOAD_I32_S: - case WebAssembly::ATOMIC_LOAD32_U_I64: - case WebAssembly::ATOMIC_LOAD32_U_I64_S: - case WebAssembly::ATOMIC_STORE_I32: - case WebAssembly::ATOMIC_STORE_I32_S: - case WebAssembly::ATOMIC_STORE32_I64: - case WebAssembly::ATOMIC_STORE32_I64_S: - case WebAssembly::ATOMIC_RMW_ADD_I32: - case WebAssembly::ATOMIC_RMW_ADD_I32_S: - case WebAssembly::ATOMIC_RMW32_U_ADD_I64: - case WebAssembly::ATOMIC_RMW32_U_ADD_I64_S: - case WebAssembly::ATOMIC_RMW_SUB_I32: - case WebAssembly::ATOMIC_RMW_SUB_I32_S: - case WebAssembly::ATOMIC_RMW32_U_SUB_I64: - case WebAssembly::ATOMIC_RMW32_U_SUB_I64_S: - case WebAssembly::ATOMIC_RMW_AND_I32: - case WebAssembly::ATOMIC_RMW_AND_I32_S: - case WebAssembly::ATOMIC_RMW32_U_AND_I64: - case WebAssembly::ATOMIC_RMW32_U_AND_I64_S: - case WebAssembly::ATOMIC_RMW_OR_I32: - case WebAssembly::ATOMIC_RMW_OR_I32_S: - case WebAssembly::ATOMIC_RMW32_U_OR_I64: - case WebAssembly::ATOMIC_RMW32_U_OR_I64_S: - case WebAssembly::ATOMIC_RMW_XOR_I32: - case WebAssembly::ATOMIC_RMW_XOR_I32_S: - case WebAssembly::ATOMIC_RMW32_U_XOR_I64: - case WebAssembly::ATOMIC_RMW32_U_XOR_I64_S: - case WebAssembly::ATOMIC_RMW_XCHG_I32: - case WebAssembly::ATOMIC_RMW_XCHG_I32_S: - case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: - case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I32: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I32_S: - case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64_S: - case WebAssembly::ATOMIC_NOTIFY: - case WebAssembly::ATOMIC_NOTIFY_S: - case WebAssembly::ATOMIC_WAIT_I32: - case WebAssembly::ATOMIC_WAIT_I32_S: - case WebAssembly::LOAD_SPLAT_v32x4: - case WebAssembly::LOAD_SPLAT_v32x4_S: + WASM_LOAD_STORE(LOAD_I32) + WASM_LOAD_STORE(LOAD_F32) + WASM_LOAD_STORE(STORE_I32) + WASM_LOAD_STORE(STORE_F32) + WASM_LOAD_STORE(LOAD32_S_I64) + WASM_LOAD_STORE(LOAD32_U_I64) + WASM_LOAD_STORE(STORE32_I64) + WASM_LOAD_STORE(ATOMIC_LOAD_I32) + WASM_LOAD_STORE(ATOMIC_LOAD32_U_I64) + WASM_LOAD_STORE(ATOMIC_STORE_I32) + WASM_LOAD_STORE(ATOMIC_STORE32_I64) + WASM_LOAD_STORE(ATOMIC_RMW_ADD_I32) + WASM_LOAD_STORE(ATOMIC_RMW32_U_ADD_I64) + WASM_LOAD_STORE(ATOMIC_RMW_SUB_I32) + WASM_LOAD_STORE(ATOMIC_RMW32_U_SUB_I64) + WASM_LOAD_STORE(ATOMIC_RMW_AND_I32) + WASM_LOAD_STORE(ATOMIC_RMW32_U_AND_I64) + WASM_LOAD_STORE(ATOMIC_RMW_OR_I32) + WASM_LOAD_STORE(ATOMIC_RMW32_U_OR_I64) + WASM_LOAD_STORE(ATOMIC_RMW_XOR_I32) + WASM_LOAD_STORE(ATOMIC_RMW32_U_XOR_I64) + WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I32) + WASM_LOAD_STORE(ATOMIC_RMW32_U_XCHG_I64) + WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I32) + WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64) + WASM_LOAD_STORE(ATOMIC_NOTIFY) + WASM_LOAD_STORE(ATOMIC_WAIT_I32) + WASM_LOAD_STORE(LOAD_SPLAT_v32x4) return 2; - case WebAssembly::LOAD_I64: - case WebAssembly::LOAD_I64_S: - case WebAssembly::LOAD_F64: - case WebAssembly::LOAD_F64_S: - case WebAssembly::STORE_I64: - case WebAssembly::STORE_I64_S: - case WebAssembly::STORE_F64: - case WebAssembly::STORE_F64_S: - case WebAssembly::ATOMIC_LOAD_I64: - case WebAssembly::ATOMIC_LOAD_I64_S: - case WebAssembly::ATOMIC_STORE_I64: - case WebAssembly::ATOMIC_STORE_I64_S: - case WebAssembly::ATOMIC_RMW_ADD_I64: - case WebAssembly::ATOMIC_RMW_ADD_I64_S: - case WebAssembly::ATOMIC_RMW_SUB_I64: - case WebAssembly::ATOMIC_RMW_SUB_I64_S: - case WebAssembly::ATOMIC_RMW_AND_I64: - case WebAssembly::ATOMIC_RMW_AND_I64_S: - case WebAssembly::ATOMIC_RMW_OR_I64: - case WebAssembly::ATOMIC_RMW_OR_I64_S: - case WebAssembly::ATOMIC_RMW_XOR_I64: - case WebAssembly::ATOMIC_RMW_XOR_I64_S: - case WebAssembly::ATOMIC_RMW_XCHG_I64: - case WebAssembly::ATOMIC_RMW_XCHG_I64_S: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I64: - case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S: - case WebAssembly::ATOMIC_WAIT_I64: - case WebAssembly::ATOMIC_WAIT_I64_S: - case WebAssembly::LOAD_SPLAT_v64x2: - case WebAssembly::LOAD_SPLAT_v64x2_S: - case WebAssembly::LOAD_EXTEND_S_v8i16: - case WebAssembly::LOAD_EXTEND_S_v8i16_S: - case WebAssembly::LOAD_EXTEND_U_v8i16: - case WebAssembly::LOAD_EXTEND_U_v8i16_S: - case WebAssembly::LOAD_EXTEND_S_v4i32: - case WebAssembly::LOAD_EXTEND_S_v4i32_S: - case WebAssembly::LOAD_EXTEND_U_v4i32: - case WebAssembly::LOAD_EXTEND_U_v4i32_S: - case WebAssembly::LOAD_EXTEND_S_v2i64: - case WebAssembly::LOAD_EXTEND_S_v2i64_S: - case WebAssembly::LOAD_EXTEND_U_v2i64: - case WebAssembly::LOAD_EXTEND_U_v2i64_S: + WASM_LOAD_STORE(LOAD_I64) + WASM_LOAD_STORE(LOAD_F64) + WASM_LOAD_STORE(STORE_I64) + WASM_LOAD_STORE(STORE_F64) + WASM_LOAD_STORE(ATOMIC_LOAD_I64) + WASM_LOAD_STORE(ATOMIC_STORE_I64) + WASM_LOAD_STORE(ATOMIC_RMW_ADD_I64) + WASM_LOAD_STORE(ATOMIC_RMW_SUB_I64) + WASM_LOAD_STORE(ATOMIC_RMW_AND_I64) + WASM_LOAD_STORE(ATOMIC_RMW_OR_I64) + WASM_LOAD_STORE(ATOMIC_RMW_XOR_I64) + WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64) + WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64) + WASM_LOAD_STORE(ATOMIC_WAIT_I64) + WASM_LOAD_STORE(LOAD_SPLAT_v64x2) + WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16) + WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16) + WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32) + WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32) + WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64) + WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64) return 3; - case WebAssembly::LOAD_V128: - case WebAssembly::LOAD_V128_S: - case WebAssembly::STORE_V128: - case WebAssembly::STORE_V128_S: + WASM_LOAD_STORE(LOAD_V128) + WASM_LOAD_STORE(STORE_V128) return 4; default: return -1; } +#undef WASM_LOAD_STORE } inline unsigned GetDefaultP2Align(unsigned Opc) { @@ -441,30 +344,8 @@ inline bool isTee(unsigned Opc) { inline bool isCallDirect(unsigned Opc) { switch (Opc) { - case WebAssembly::CALL_VOID: - case WebAssembly::CALL_VOID_S: - case WebAssembly::CALL_i32: - case WebAssembly::CALL_i32_S: - case WebAssembly::CALL_i64: - case WebAssembly::CALL_i64_S: - case WebAssembly::CALL_f32: - case WebAssembly::CALL_f32_S: - case WebAssembly::CALL_f64: - case WebAssembly::CALL_f64_S: - case WebAssembly::CALL_v16i8: - case WebAssembly::CALL_v16i8_S: - case WebAssembly::CALL_v8i16: - case WebAssembly::CALL_v8i16_S: - case WebAssembly::CALL_v4i32: - case WebAssembly::CALL_v4i32_S: - case WebAssembly::CALL_v2i64: - case WebAssembly::CALL_v2i64_S: - case WebAssembly::CALL_v4f32: - case WebAssembly::CALL_v4f32_S: - case WebAssembly::CALL_v2f64: - case WebAssembly::CALL_v2f64_S: - case WebAssembly::CALL_exnref: - case WebAssembly::CALL_exnref_S: + case WebAssembly::CALL: + case WebAssembly::CALL_S: case WebAssembly::RET_CALL: case WebAssembly::RET_CALL_S: return true; @@ -475,30 +356,8 @@ inline bool isCallDirect(unsigned Opc) { inline bool isCallIndirect(unsigned Opc) { switch (Opc) { - case WebAssembly::CALL_INDIRECT_VOID: - case WebAssembly::CALL_INDIRECT_VOID_S: - case WebAssembly::CALL_INDIRECT_i32: - case WebAssembly::CALL_INDIRECT_i32_S: - case WebAssembly::CALL_INDIRECT_i64: - case WebAssembly::CALL_INDIRECT_i64_S: - case WebAssembly::CALL_INDIRECT_f32: - case WebAssembly::CALL_INDIRECT_f32_S: - case WebAssembly::CALL_INDIRECT_f64: - case WebAssembly::CALL_INDIRECT_f64_S: - case WebAssembly::CALL_INDIRECT_v16i8: - case WebAssembly::CALL_INDIRECT_v16i8_S: - case WebAssembly::CALL_INDIRECT_v8i16: - case WebAssembly::CALL_INDIRECT_v8i16_S: - case WebAssembly::CALL_INDIRECT_v4i32: - case WebAssembly::CALL_INDIRECT_v4i32_S: - case WebAssembly::CALL_INDIRECT_v2i64: - case WebAssembly::CALL_INDIRECT_v2i64_S: - case WebAssembly::CALL_INDIRECT_v4f32: - case WebAssembly::CALL_INDIRECT_v4f32_S: - case WebAssembly::CALL_INDIRECT_v2f64: - case WebAssembly::CALL_INDIRECT_v2f64_S: - case WebAssembly::CALL_INDIRECT_exnref: - case WebAssembly::CALL_INDIRECT_exnref_S: + case WebAssembly::CALL_INDIRECT: + case WebAssembly::CALL_INDIRECT_S: case WebAssembly::RET_CALL_INDIRECT: case WebAssembly::RET_CALL_INDIRECT_S: return true; @@ -507,66 +366,15 @@ inline bool isCallIndirect(unsigned Opc) { } } -/// Returns the operand number of a callee, assuming the argument is a call -/// instruction. -inline unsigned getCalleeOpNo(unsigned Opc) { - switch (Opc) { - case WebAssembly::CALL_VOID: - case WebAssembly::CALL_VOID_S: - case WebAssembly::CALL_INDIRECT_VOID: - case WebAssembly::CALL_INDIRECT_VOID_S: - case WebAssembly::RET_CALL: - case WebAssembly::RET_CALL_S: - case WebAssembly::RET_CALL_INDIRECT: - case WebAssembly::RET_CALL_INDIRECT_S: - return 0; - case WebAssembly::CALL_i32: - case WebAssembly::CALL_i32_S: - case WebAssembly::CALL_i64: - case WebAssembly::CALL_i64_S: - case WebAssembly::CALL_f32: - case WebAssembly::CALL_f32_S: - case WebAssembly::CALL_f64: - case WebAssembly::CALL_f64_S: - case WebAssembly::CALL_v16i8: - case WebAssembly::CALL_v16i8_S: - case WebAssembly::CALL_v8i16: - case WebAssembly::CALL_v8i16_S: - case WebAssembly::CALL_v4i32: - case WebAssembly::CALL_v4i32_S: - case WebAssembly::CALL_v2i64: - case WebAssembly::CALL_v2i64_S: - case WebAssembly::CALL_v4f32: - case WebAssembly::CALL_v4f32_S: - case WebAssembly::CALL_v2f64: - case WebAssembly::CALL_v2f64_S: - case WebAssembly::CALL_exnref: - case WebAssembly::CALL_exnref_S: - case WebAssembly::CALL_INDIRECT_i32: - case WebAssembly::CALL_INDIRECT_i32_S: - case WebAssembly::CALL_INDIRECT_i64: - case WebAssembly::CALL_INDIRECT_i64_S: - case WebAssembly::CALL_INDIRECT_f32: - case WebAssembly::CALL_INDIRECT_f32_S: - case WebAssembly::CALL_INDIRECT_f64: - case WebAssembly::CALL_INDIRECT_f64_S: - case WebAssembly::CALL_INDIRECT_v16i8: - case WebAssembly::CALL_INDIRECT_v16i8_S: - case WebAssembly::CALL_INDIRECT_v8i16: - case WebAssembly::CALL_INDIRECT_v8i16_S: - case WebAssembly::CALL_INDIRECT_v4i32: - case WebAssembly::CALL_INDIRECT_v4i32_S: - case WebAssembly::CALL_INDIRECT_v2i64: - case WebAssembly::CALL_INDIRECT_v2i64_S: - case WebAssembly::CALL_INDIRECT_v4f32: - case WebAssembly::CALL_INDIRECT_v4f32_S: - case WebAssembly::CALL_INDIRECT_v2f64: - case WebAssembly::CALL_INDIRECT_v2f64_S: - case WebAssembly::CALL_INDIRECT_exnref: - case WebAssembly::CALL_INDIRECT_exnref_S: - return 1; +inline bool isBrTable(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case WebAssembly::BR_TABLE_I32: + case WebAssembly::BR_TABLE_I32_S: + case WebAssembly::BR_TABLE_I64: + case WebAssembly::BR_TABLE_I64_S: + return true; default: - llvm_unreachable("Not a call instruction"); + return false; } } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index 7c21ed5f974e2..e954eeaebb141 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -28,7 +28,7 @@ WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} void WebAssemblyTargetStreamer::emitValueType(wasm::ValType Type) { - Streamer.EmitIntValue(uint8_t(Type), 1); + Streamer.emitIntValue(uint8_t(Type), 1); } WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer( @@ -113,9 +113,9 @@ void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) { ++Grouped.back().second; } - Streamer.EmitULEB128IntValue(Grouped.size()); + Streamer.emitULEB128IntValue(Grouped.size()); for (auto Pair : Grouped) { - Streamer.EmitULEB128IntValue(Pair.second); + Streamer.emitULEB128IntValue(Pair.second); emitValueType(Pair.first); } } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 9aee1a06c9562..d6fba05c9986f 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -21,7 +21,6 @@ namespace llvm { -class MCWasmStreamer; class MCSymbolWasm; /// WebAssembly-specific streamer interface, to implement support diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index e7a599e3e1750..779e921c1d949 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -78,7 +78,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, return wasm::R_WASM_TABLE_INDEX_REL_SLEB; case MCSymbolRefExpr::VK_WASM_MBREL: assert(SymA.isData()); - return wasm::R_WASM_MEMORY_ADDR_REL_SLEB; + return is64Bit() ? wasm::R_WASM_MEMORY_ADDR_REL_SLEB64 + : wasm::R_WASM_MEMORY_ADDR_REL_SLEB; case MCSymbolRefExpr::VK_WASM_TYPEINDEX: return wasm::R_WASM_TYPE_INDEX_LEB; default: @@ -91,7 +92,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, return wasm::R_WASM_TABLE_INDEX_SLEB; return wasm::R_WASM_MEMORY_ADDR_SLEB; case WebAssembly::fixup_sleb128_i64: - llvm_unreachable("fixup_sleb128_i64 not implemented yet"); + assert(SymA.isData()); + return wasm::R_WASM_MEMORY_ADDR_SLEB64; case WebAssembly::fixup_uleb128_i32: if (SymA.isGlobal()) return wasm::R_WASM_GLOBAL_INDEX_LEB; @@ -100,9 +102,14 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, if (SymA.isEvent()) return wasm::R_WASM_EVENT_INDEX_LEB; return wasm::R_WASM_MEMORY_ADDR_LEB; + case WebAssembly::fixup_uleb128_i64: + assert(SymA.isData()); + return wasm::R_WASM_MEMORY_ADDR_LEB64; case FK_Data_4: if (SymA.isFunction()) return wasm::R_WASM_TABLE_INDEX_I32; + if (SymA.isGlobal()) + return wasm::R_WASM_GLOBAL_INDEX_I32; if (auto Section = static_cast<const MCSectionWasm *>( getFixupSection(Fixup.getValue()))) { if (Section->getKind().isText()) @@ -111,6 +118,9 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, return wasm::R_WASM_SECTION_OFFSET_I32; } return wasm::R_WASM_MEMORY_ADDR_I32; + case FK_Data_8: + assert(SymA.isData()); + return wasm::R_WASM_MEMORY_ADDR_I64; default: llvm_unreachable("unimplemented fixup kind"); } diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp index 87317f8a7f1e5..f9a96819905f3 100644 --- a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp +++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp @@ -32,3 +32,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTargetInfo() { RegisterTarget<Triple::wasm64> Y(getTheWebAssemblyTarget64(), "wasm64", "WebAssembly 64-bit", "WebAssembly"); } + +// Defines llvm::WebAssembly::getWasm64Opcode llvm::WebAssembly::getStackOpcode +// which have to be in a shared location between CodeGen and MC. +#define GET_INSTRMAP_INFO 1 +#define GET_INSTRINFO_ENUM 1 +#include "WebAssemblyGenInstrInfo.inc" diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h index a7427f78c72c3..be7a632331c8e 100644 --- a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h +++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h @@ -21,6 +21,13 @@ class Target; Target &getTheWebAssemblyTarget32(); Target &getTheWebAssemblyTarget64(); +namespace WebAssembly { + +int getStackOpcode(unsigned short Opcode); +int getWasm64Opcode(unsigned short Opcode); + +} // namespace WebAssembly + } // namespace llvm #endif // LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h index fcd48e0096b61..9ce02f7731e08 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -44,6 +44,7 @@ FunctionPass *createWebAssemblyOptimizeLiveIntervals(); FunctionPass *createWebAssemblyMemIntrinsicResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); +FunctionPass *createWebAssemblyFixBrTableDefaults(); FunctionPass *createWebAssemblyFixIrreducibleControlFlow(); FunctionPass *createWebAssemblyLateEHPrepare(); FunctionPass *createWebAssemblyCFGSort(); @@ -51,8 +52,8 @@ FunctionPass *createWebAssemblyCFGStackify(); FunctionPass *createWebAssemblyExplicitLocals(); FunctionPass *createWebAssemblyLowerBrUnless(); FunctionPass *createWebAssemblyRegNumbering(); +FunctionPass *createWebAssemblyDebugFixup(); FunctionPass *createWebAssemblyPeephole(); -FunctionPass *createWebAssemblyCallIndirectFixup(); // PassRegistry initialization declarations. void initializeWebAssemblyAddMissingPrototypesPass(PassRegistry &); @@ -68,6 +69,7 @@ void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &); void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); void initializeWebAssemblyRegStackifyPass(PassRegistry &); void initializeWebAssemblyRegColoringPass(PassRegistry &); +void initializeWebAssemblyFixBrTableDefaultsPass(PassRegistry &); void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &); void initializeWebAssemblyLateEHPreparePass(PassRegistry &); void initializeWebAssemblyExceptionInfoPass(PassRegistry &); @@ -76,11 +78,20 @@ void initializeWebAssemblyCFGStackifyPass(PassRegistry &); void initializeWebAssemblyExplicitLocalsPass(PassRegistry &); void initializeWebAssemblyLowerBrUnlessPass(PassRegistry &); void initializeWebAssemblyRegNumberingPass(PassRegistry &); +void initializeWebAssemblyDebugFixupPass(PassRegistry &); void initializeWebAssemblyPeepholePass(PassRegistry &); -void initializeWebAssemblyCallIndirectFixupPass(PassRegistry &); namespace WebAssembly { -enum TargetIndex { TI_LOCAL_START, TI_GLOBAL_START, TI_OPERAND_STACK_START }; +enum TargetIndex { + // Followed by a local index (ULEB). + TI_LOCAL, + // Followed by an absolute global index (ULEB). DEPRECATED. + TI_GLOBAL_FIXED, + TI_OPERAND_STACK, + // Followed by a compilation unit relative global index (uint32_t) + // that will have an associated relocation. + TI_GLOBAL_RELOC +}; } // end namespace WebAssembly } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index b0b8a9b996a37..2c18bf2c3abea 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -66,6 +66,10 @@ def FeatureMutableGlobals : SubtargetFeature<"mutable-globals", "HasMutableGlobals", "true", "Enable mutable globals">; +def FeatureReferenceTypes : + SubtargetFeature<"reference-types", "HasReferenceTypes", "true", + "Enable reference types">; + //===----------------------------------------------------------------------===// // Architectures. //===----------------------------------------------------------------------===// @@ -98,7 +102,8 @@ def : ProcessorModel<"generic", NoSchedModel, []>; def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128, FeatureAtomics, FeatureNontrappingFPToInt, FeatureSignExt, - FeatureMutableGlobals]>; + FeatureMutableGlobals, FeatureBulkMemory, + FeatureTailCall]>; //===----------------------------------------------------------------------===// // Target Declaration diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp index b7a701f157829..530a55cda0e5a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp @@ -132,7 +132,7 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) { for (auto &Pair : Replacements) { Function *OldF = Pair.first; Function *NewF = Pair.second; - std::string Name = OldF->getName(); + std::string Name = std::string(OldF->getName()); M.getFunctionList().push_back(NewF); OldF->replaceAllUsesWith( ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewF, OldF->getType())); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index adcb24b4be534..96fa13d307290 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -85,7 +85,7 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() { // WebAssemblyAsmPrinter Implementation. //===----------------------------------------------------------------------===// -void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { +void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) { for (auto &It : OutContext.getSymbols()) { // Emit a .globaltype and .eventtype declaration. auto Sym = cast<MCSymbolWasm>(It.getValue()); @@ -103,7 +103,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { if (F.isDeclarationForLinker()) { SmallVector<MVT, 4> Results; SmallVector<MVT, 4> Params; - computeSignatureVTs(F.getFunctionType(), F, TM, Params, Results); + computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results); auto *Sym = cast<MCSymbolWasm>(getSymbol(&F)); Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); if (!Sym->getSignature()) { @@ -122,14 +122,14 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { F.hasFnAttribute("wasm-import-module")) { StringRef Name = F.getFnAttribute("wasm-import-module").getValueAsString(); - Sym->setImportModule(Name); + Sym->setImportModule(storeName(Name)); getTargetStreamer()->emitImportModule(Sym, Name); } if (TM.getTargetTriple().isOSBinFormatWasm() && F.hasFnAttribute("wasm-import-name")) { StringRef Name = F.getFnAttribute("wasm-import-name").getValueAsString(); - Sym->setImportName(Name); + Sym->setImportName(storeName(Name)); getTargetStreamer()->emitImportName(Sym, Name); } } @@ -137,7 +137,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { if (F.hasFnAttribute("wasm-export-name")) { auto *Sym = cast<MCSymbolWasm>(getSymbol(&F)); StringRef Name = F.getFnAttribute("wasm-export-name").getValueAsString(); - Sym->setExportName(Name); + Sym->setExportName(storeName(Name)); getTargetStreamer()->emitExportName(Sym, Name); } } @@ -167,7 +167,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { MCSectionWasm *MySection = OutContext.getWasmSection(SectionName, SectionKind::getMetadata()); OutStreamer->SwitchSection(MySection); - OutStreamer->EmitBytes(Contents->getString()); + OutStreamer->emitBytes(Contents->getString()); OutStreamer->PopSection(); } } @@ -208,19 +208,19 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) { ".custom_section.producers", SectionKind::getMetadata()); OutStreamer->PushSection(); OutStreamer->SwitchSection(Producers); - OutStreamer->EmitULEB128IntValue(FieldCount); + OutStreamer->emitULEB128IntValue(FieldCount); for (auto &Producers : {std::make_pair("language", &Languages), std::make_pair("processed-by", &Tools)}) { if (Producers.second->empty()) continue; - OutStreamer->EmitULEB128IntValue(strlen(Producers.first)); - OutStreamer->EmitBytes(Producers.first); - OutStreamer->EmitULEB128IntValue(Producers.second->size()); + OutStreamer->emitULEB128IntValue(strlen(Producers.first)); + OutStreamer->emitBytes(Producers.first); + OutStreamer->emitULEB128IntValue(Producers.second->size()); for (auto &Producer : *Producers.second) { - OutStreamer->EmitULEB128IntValue(Producer.first.size()); - OutStreamer->EmitBytes(Producer.first); - OutStreamer->EmitULEB128IntValue(Producer.second.size()); - OutStreamer->EmitBytes(Producer.second); + OutStreamer->emitULEB128IntValue(Producer.first.size()); + OutStreamer->emitBytes(Producer.first); + OutStreamer->emitULEB128IntValue(Producer.second.size()); + OutStreamer->emitBytes(Producer.second); } } OutStreamer->PopSection(); @@ -230,20 +230,20 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) { void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) { struct FeatureEntry { uint8_t Prefix; - StringRef Name; + std::string Name; }; // Read target features and linkage policies from module metadata SmallVector<FeatureEntry, 4> EmittedFeatures; - for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { - std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); + auto EmitFeature = [&](std::string Feature) { + std::string MDKey = (StringRef("wasm-feature-") + Feature).str(); Metadata *Policy = M.getModuleFlag(MDKey); if (Policy == nullptr) - continue; + return; FeatureEntry Entry; Entry.Prefix = 0; - Entry.Name = KV.Key; + Entry.Name = Feature; if (auto *MD = cast<ConstantAsMetadata>(Policy)) if (auto *I = cast<ConstantInt>(MD->getValue())) @@ -253,10 +253,16 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) { if (Entry.Prefix != wasm::WASM_FEATURE_PREFIX_USED && Entry.Prefix != wasm::WASM_FEATURE_PREFIX_REQUIRED && Entry.Prefix != wasm::WASM_FEATURE_PREFIX_DISALLOWED) - continue; + return; EmittedFeatures.push_back(Entry); + }; + + for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { + EmitFeature(KV.Key); } + // This pseudo-feature tells the linker whether shared memory would be safe + EmitFeature("shared-mem"); if (EmittedFeatures.size() == 0) return; @@ -267,30 +273,31 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) { OutStreamer->PushSection(); OutStreamer->SwitchSection(FeaturesSection); - OutStreamer->EmitULEB128IntValue(EmittedFeatures.size()); + OutStreamer->emitULEB128IntValue(EmittedFeatures.size()); for (auto &F : EmittedFeatures) { - OutStreamer->EmitIntValue(F.Prefix, 1); - OutStreamer->EmitULEB128IntValue(F.Name.size()); - OutStreamer->EmitBytes(F.Name); + OutStreamer->emitIntValue(F.Prefix, 1); + OutStreamer->emitULEB128IntValue(F.Name.size()); + OutStreamer->emitBytes(F.Name); } OutStreamer->PopSection(); } -void WebAssemblyAsmPrinter::EmitConstantPool() { +void WebAssemblyAsmPrinter::emitConstantPool() { assert(MF->getConstantPool()->getConstants().empty() && "WebAssembly disables constant pools"); } -void WebAssemblyAsmPrinter::EmitJumpTableInfo() { +void WebAssemblyAsmPrinter::emitJumpTableInfo() { // Nothing to do; jump tables are incorporated into the instruction stream. } -void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { +void WebAssemblyAsmPrinter::emitFunctionBodyStart() { const Function &F = MF->getFunction(); SmallVector<MVT, 1> ResultVTs; SmallVector<MVT, 4> ParamVTs; - computeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs); + computeSignatureVTs(F.getFunctionType(), &F, F, TM, ParamVTs, ResultVTs); + auto Signature = signatureFromMVTs(ResultVTs, ParamVTs); auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym); WasmSym->setSignature(Signature.get()); @@ -312,10 +319,10 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { valTypesFromMVTs(MFI->getLocals(), Locals); getTargetStreamer()->emitLocal(Locals); - AsmPrinter::EmitFunctionBodyStart(); + AsmPrinter::emitFunctionBodyStart(); } -void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { +void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) { LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n'); switch (MI->getOpcode()) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h index 4e55c81dec38a..d9281568638d7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h @@ -16,9 +16,7 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { -class MCSymbol; class WebAssemblyTargetStreamer; -class WebAssemblyMCInstLower; class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter { const WebAssemblySubtarget *Subtarget; @@ -26,6 +24,13 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter { WebAssemblyFunctionInfo *MFI; // TODO: Do the uniquing of Signatures here instead of ObjectFileWriter? std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures; + std::vector<std::unique_ptr<std::string>> Names; + + StringRef storeName(StringRef Name) { + std::unique_ptr<std::string> N = std::make_unique<std::string>(Name); + Names.push_back(std::move(N)); + return *Names.back(); + } public: explicit WebAssemblyAsmPrinter(TargetMachine &TM, @@ -57,13 +62,13 @@ public: // AsmPrinter Implementation. //===------------------------------------------------------------------===// - void EmitEndOfAsmFile(Module &M) override; + void emitEndOfAsmFile(Module &M) override; void EmitProducerInfo(Module &M); void EmitTargetFeatures(Module &M); - void EmitJumpTableInfo() override; - void EmitConstantPool() override; - void EmitFunctionBodyStart() override; - void EmitInstruction(const MachineInstr *MI) override; + void emitJumpTableInfo() override; + void emitConstantPool() override; + void emitFunctionBodyStart() override; + void emitInstruction(const MachineInstr *MI) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index c069af9eed623..8442b49e25f45 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -79,7 +79,6 @@ template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; } class RegionInfo { const MachineLoopInfo &MLI; const WebAssemblyExceptionInfo &WEI; - std::vector<const Region *> Regions; DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap; DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap; @@ -93,7 +92,14 @@ public: const auto *WE = WEI.getExceptionFor(MBB); if (!ML && !WE) return nullptr; - if ((ML && !WE) || (ML && WE && ML->getNumBlocks() < WE->getNumBlocks())) { + // We determine subregion relationship by domination of their headers, i.e., + // if region A's header dominates region B's header, B is a subregion of A. + // WebAssemblyException contains BBs in all its subregions (loops or + // exceptions), but MachineLoop may not, because MachineLoop does not contain + // BBs that don't have a path to its header even if they are dominated by + // its header. So here we should use WE->contains(ML->getHeader()), but not + // ML->contains(WE->getHeader()). + if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) { // If the smallest region containing MBB is a loop if (LoopMap.count(ML)) return LoopMap[ML].get(); @@ -152,9 +158,17 @@ static void maybeUpdateTerminator(MachineBasicBlock *MBB) { AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch(); } assert((AnyBarrier || AllAnalyzable) && - "AnalyzeBranch needs to analyze any block with a fallthrough"); + "analyzeBranch needs to analyze any block with a fallthrough"); + + // Find the layout successor from the original block order. + MachineFunction *MF = MBB->getParent(); + MachineBasicBlock *OriginalSuccessor = + unsigned(MBB->getNumber() + 1) < MF->getNumBlockIDs() + ? MF->getBlockNumbered(MBB->getNumber() + 1) + : nullptr; + if (AllAnalyzable) - MBB->updateTerminator(); + MBB->updateTerminator(OriginalSuccessor); } namespace { @@ -241,9 +255,12 @@ struct Entry { static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, const WebAssemblyExceptionInfo &WEI, const MachineDominatorTree &MDT) { + // Remember original layout ordering, so we can update terminators after + // reordering to point to the original layout successor. + MF.RenumberBlocks(); + // Prepare for a topological sort: Record the number of predecessors each // block has, ignoring loop backedges. - MF.RenumberBlocks(); SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0); for (MachineBasicBlock &MBB : MF) { unsigned N = MBB.pred_size(); @@ -368,6 +385,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, const Region *Region = RI.getRegionFor(&MBB); if (Region && &MBB == Region->getHeader()) { + // Region header. if (Region->isLoop()) { // Loop header. The loop predecessor should be sorted above, and the // other predecessors should be backedges below. @@ -377,7 +395,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, "Loop header predecessors must be loop predecessors or " "backedges"); } else { - // Not a loop header. All predecessors should be sorted above. + // Exception header. All predecessors should be sorted above. for (auto Pred : MBB.predecessors()) assert(Pred->getNumber() < MBB.getNumber() && "Non-loop-header predecessors should be topologically sorted"); @@ -386,7 +404,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, "Regions should be declared at most once."); } else { - // Not a loop header. All predecessors should be sorted above. + // Not a region header. All predecessors should be sorted above. for (auto Pred : MBB.predecessors()) assert(Pred->getNumber() < MBB.getNumber() && "Non-loop-header predecessors should be topologically sorted"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 7e867edaaa27f..8cbfc98e81978 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "wasm-cfg-stackify" @@ -277,11 +278,19 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { #endif } - // All previously inserted BLOCK/TRY markers should be after the BLOCK - // because they are all nested blocks. + // If there is a previously placed BLOCK/TRY marker and its corresponding + // END marker is before the current BLOCK's END marker, that should be + // placed after this BLOCK. Otherwise it should be placed before this BLOCK + // marker. if (MI.getOpcode() == WebAssembly::BLOCK || - MI.getOpcode() == WebAssembly::TRY) - AfterSet.insert(&MI); + MI.getOpcode() == WebAssembly::TRY) { + if (BeginToEnd[&MI]->getParent()->getNumber() <= MBB.getNumber()) + AfterSet.insert(&MI); +#ifndef NDEBUG + else + BeforeSet.insert(&MI); +#endif + } #ifndef NDEBUG // All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK. @@ -661,9 +670,28 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode(); MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent(); bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond); + // This condition means either + // 1. This BB ends with a single unconditional branch whose destinaion is + // Cont. + // 2. This BB ends with a conditional branch followed by an unconditional + // branch, and the unconditional branch's destination is Cont. + // In both cases, we want to remove the last (= unconditional) branch. if (Analyzable && ((Cond.empty() && TBB && TBB == Cont) || - (!Cond.empty() && FBB && FBB == Cont))) - TII.removeBranch(*EHPadLayoutPred); + (!Cond.empty() && FBB && FBB == Cont))) { + bool ErasedUncondBr = false; + (void)ErasedUncondBr; + for (auto I = EHPadLayoutPred->end(), E = EHPadLayoutPred->begin(); + I != E; --I) { + auto PrevI = std::prev(I); + if (PrevI->isTerminator()) { + assert(PrevI->getOpcode() == WebAssembly::BR); + PrevI->eraseFromParent(); + ErasedUncondBr = true; + break; + } + } + assert(ErasedUncondBr && "Unconditional branch not erased!"); + } } // When there are block / end_block markers that overlap with try / end_try @@ -705,12 +733,30 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { } } +// Get the appropriate copy opcode for the given register class. +static unsigned getCopyOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::COPY_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::COPY_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::COPY_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::COPY_F64; + if (RC == &WebAssembly::V128RegClass) + return WebAssembly::COPY_V128; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::COPY_EXNREF; + llvm_unreachable("Unexpected register class"); +} + // When MBB is split into MBB and Split, we should unstackify defs in MBB that // have their uses in Split. static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, MachineBasicBlock &Split, WebAssemblyFunctionInfo &MFI, - MachineRegisterInfo &MRI) { + MachineRegisterInfo &MRI, + const WebAssemblyInstrInfo &TII) { for (auto &MI : Split) { for (auto &MO : MI.explicit_uses()) { if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg())) @@ -720,6 +766,47 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, MFI.unstackifyVReg(MO.getReg()); } } + + // In RegStackify, when a register definition is used multiple times, + // Reg = INST ... + // INST ..., Reg, ... + // INST ..., Reg, ... + // INST ..., Reg, ... + // + // we introduce a TEE, which has the following form: + // DefReg = INST ... + // TeeReg, Reg = TEE_... DefReg + // INST ..., TeeReg, ... + // INST ..., Reg, ... + // INST ..., Reg, ... + // with DefReg and TeeReg stackified but Reg not stackified. + // + // But the invariant that TeeReg should be stackified can be violated while we + // unstackify registers in the split BB above. In this case, we convert TEEs + // into two COPYs. This COPY will be eventually eliminated in ExplicitLocals. + // DefReg = INST ... + // TeeReg = COPY DefReg + // Reg = COPY DefReg + // INST ..., TeeReg, ... + // INST ..., Reg, ... + // INST ..., Reg, ... + for (auto I = MBB.begin(), E = MBB.end(); I != E;) { + MachineInstr &MI = *I++; + if (!WebAssembly::isTee(MI.getOpcode())) + continue; + Register TeeReg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(1).getReg(); + Register DefReg = MI.getOperand(2).getReg(); + if (!MFI.isVRegStackified(TeeReg)) { + // Now we are not using TEE anymore, so unstackify DefReg too + MFI.unstackifyVReg(DefReg); + unsigned CopyOpc = getCopyOpcode(MRI.getRegClass(DefReg)); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), TeeReg) + .addReg(DefReg); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), Reg).addReg(DefReg); + MI.eraseFromParent(); + } + } } bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { @@ -866,6 +953,10 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // In new CFG, <destination to branch to, register containing exnref> DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg; + // Destinations for branches that will be newly added, for which a new + // BLOCK/END_BLOCK markers are necessary. + SmallVector<MachineBasicBlock *, 8> BrDests; + // Gather possibly throwing calls (i.e., previously invokes) whose current // unwind destination is not the same as the original CFG. for (auto &MBB : reverse(MF)) { @@ -1036,7 +1127,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { BrDest->insert(BrDest->end(), EndTry->removeFromParent()); // Take out the handler body from EH pad to the new branch destination BB. BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end()); - unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI); + unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI, TII); // Fix predecessor-successor relationship. BrDest->transferSuccessors(EHPad); EHPad->addSuccessor(BrDest); @@ -1075,6 +1166,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { ? DebugLoc() : EHPadLayoutPred->rbegin()->getDebugLoc(); BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont); + BrDests.push_back(Cont); } } @@ -1109,6 +1201,9 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; std::tie(RangeBegin, RangeEnd) = Range; auto *MBB = RangeBegin->getParent(); + // Store the first function call from this range, because RangeBegin can + // be moved to point EH_LABEL before the call + MachineInstr *RangeBeginCall = RangeBegin; // Include possible EH_LABELs in the range if (RangeBegin->getIterator() != MBB->begin() && @@ -1126,9 +1221,27 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { } } + // Local expression tree before the first call of this range should go + // after the nested TRY. + SmallPtrSet<const MachineInstr *, 4> AfterSet; + AfterSet.insert(RangeBegin); + AfterSet.insert(RangeBeginCall); + for (auto I = MachineBasicBlock::iterator(RangeBeginCall), + E = MBB->begin(); + I != E; --I) { + if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) + continue; + if (WebAssembly::isChild(*std::prev(I), MFI)) + AfterSet.insert(&*std::prev(I)); + else + break; + } + // Create the nested try instruction. + auto InsertPos = getLatestInsertPos( + MBB, SmallPtrSet<const MachineInstr *, 4>(), AfterSet); MachineInstr *NestedTry = - BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(), + BuildMI(*MBB, InsertPos, RangeBegin->getDebugLoc(), TII.get(WebAssembly::TRY)) .addImm(int64_t(WebAssembly::BlockType::Void)); @@ -1152,13 +1265,21 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // new nested continuation BB. NestedCont->splice(NestedCont->end(), MBB, std::next(RangeEnd->getIterator()), MBB->end()); - unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI); + unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI, TII); registerTryScope(NestedTry, NestedEndTry, NestedEHPad); // Fix predecessor-successor relationship. NestedCont->transferSuccessors(MBB); - if (EHPad) + if (EHPad) { NestedCont->removeSuccessor(EHPad); + // If EHPad does not have any predecessors left after removing + // NextedCont predecessor, remove its successor too, because this EHPad + // is not reachable from the entry BB anyway. We can't remove EHPad BB + // itself because it can contain 'catch' or 'end', which are necessary + // for keeping try-catch-end structure. + if (EHPad->pred_empty()) + EHPad->removeSuccessor(BrDest); + } MBB->addSuccessor(NestedEHPad); MBB->addSuccessor(NestedCont); NestedEHPad->addSuccessor(BrDest); @@ -1190,10 +1311,14 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // Recompute the dominator tree. getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF); - // Place block markers for newly added branches. - SmallVector <MachineBasicBlock *, 8> BrDests; - for (auto &P : BrDestToTryRanges) - BrDests.push_back(P.first); + // Place block markers for newly added branches, if necessary. + + // If we've created an appendix BB and a branch to it, place a block/end_block + // marker for that. For some new branches, those branch destination BBs start + // with a hoisted end_try marker, so we don't need a new marker there. + if (AppendixBB) + BrDests.push_back(AppendixBB); + llvm::sort(BrDests, [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { auto ANum = A->getNumber(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp deleted file mode 100644 index 2537e6042b1e3..0000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp +++ /dev/null @@ -1,150 +0,0 @@ -//===-- WebAssemblyCallIndirectFixup.cpp - Fix call_indirects -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file converts pseudo call_indirect instructions into real -/// call_indirects. -/// -/// The order of arguments for a call_indirect is the arguments to the function -/// call, followed by the function pointer. There's no natural way to express -/// a machineinstr with varargs followed by one more arg, so we express it as -/// the function pointer followed by varargs, then rewrite it here. -/// -/// We need to rewrite the order of the arguments on the machineinstrs -/// themselves so that register stackification knows the order they'll be -/// executed in. -/// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* -#include "WebAssembly.h" -#include "WebAssemblyMachineFunctionInfo.h" -#include "WebAssemblySubtarget.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "wasm-call-indirect-fixup" - -namespace { -class WebAssemblyCallIndirectFixup final : public MachineFunctionPass { - StringRef getPassName() const override { - return "WebAssembly CallIndirect Fixup"; - } - - bool runOnMachineFunction(MachineFunction &MF) override; - -public: - static char ID; // Pass identification, replacement for typeid - WebAssemblyCallIndirectFixup() : MachineFunctionPass(ID) {} -}; -} // end anonymous namespace - -char WebAssemblyCallIndirectFixup::ID = 0; -INITIALIZE_PASS(WebAssemblyCallIndirectFixup, DEBUG_TYPE, - "Rewrite call_indirect argument orderings", false, false) - -FunctionPass *llvm::createWebAssemblyCallIndirectFixup() { - return new WebAssemblyCallIndirectFixup(); -} - -static unsigned getNonPseudoCallIndirectOpcode(const MachineInstr &MI) { - switch (MI.getOpcode()) { - using namespace WebAssembly; - case PCALL_INDIRECT_VOID: - return CALL_INDIRECT_VOID; - case PCALL_INDIRECT_i32: - return CALL_INDIRECT_i32; - case PCALL_INDIRECT_i64: - return CALL_INDIRECT_i64; - case PCALL_INDIRECT_f32: - return CALL_INDIRECT_f32; - case PCALL_INDIRECT_f64: - return CALL_INDIRECT_f64; - case PCALL_INDIRECT_v16i8: - return CALL_INDIRECT_v16i8; - case PCALL_INDIRECT_v8i16: - return CALL_INDIRECT_v8i16; - case PCALL_INDIRECT_v4i32: - return CALL_INDIRECT_v4i32; - case PCALL_INDIRECT_v2i64: - return CALL_INDIRECT_v2i64; - case PCALL_INDIRECT_v4f32: - return CALL_INDIRECT_v4f32; - case PCALL_INDIRECT_v2f64: - return CALL_INDIRECT_v2f64; - case PCALL_INDIRECT_exnref: - return CALL_INDIRECT_exnref; - case PRET_CALL_INDIRECT: - return RET_CALL_INDIRECT; - default: - return INSTRUCTION_LIST_END; - } -} - -static bool isPseudoCallIndirect(const MachineInstr &MI) { - return getNonPseudoCallIndirectOpcode(MI) != - WebAssembly::INSTRUCTION_LIST_END; -} - -bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) { - LLVM_DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n" - << "********** Function: " << MF.getName() << '\n'); - - bool Changed = false; - const WebAssemblyInstrInfo *TII = - MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); - - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (isPseudoCallIndirect(MI)) { - LLVM_DEBUG(dbgs() << "Found call_indirect: " << MI << '\n'); - - // Rewrite pseudo to non-pseudo - const MCInstrDesc &Desc = TII->get(getNonPseudoCallIndirectOpcode(MI)); - MI.setDesc(Desc); - - // Rewrite argument order - SmallVector<MachineOperand, 8> Ops; - - // Set up a placeholder for the type signature immediate. - Ops.push_back(MachineOperand::CreateImm(0)); - - // Set up the flags immediate, which currently has no defined flags - // so it's always zero. - Ops.push_back(MachineOperand::CreateImm(0)); - - for (const MachineOperand &MO : - make_range(MI.operands_begin() + MI.getDesc().getNumDefs() + 1, - MI.operands_begin() + MI.getNumExplicitOperands())) - Ops.push_back(MO); - Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs())); - - // Replace the instructions operands. - while (MI.getNumOperands() > MI.getDesc().getNumDefs()) - MI.RemoveOperand(MI.getNumOperands() - 1); - for (const MachineOperand &MO : Ops) - MI.addOperand(MO); - - LLVM_DEBUG(dbgs() << " After transform: " << MI); - Changed = true; - } - } - } - - LLVM_DEBUG(dbgs() << "\nDone fixing up CALL_INDIRECTs\n\n"); - - return Changed; -} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp new file mode 100644 index 0000000000000..655e30a29eff4 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp @@ -0,0 +1,138 @@ +//===-- WebAssemblyDebugFixup.cpp - Debug Fixup ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Several prior passes may "stackify" registers, here we ensure any references +/// in such registers in debug_value instructions become stack relative also. +/// This is done in a separate pass such that not all previous passes need to +/// track stack depth when values get stackified. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-debug-fixup" + +namespace { +class WebAssemblyDebugFixup final : public MachineFunctionPass { + StringRef getPassName() const override { return "WebAssembly Debug Fixup"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyDebugFixup() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyDebugFixup::ID = 0; +INITIALIZE_PASS( + WebAssemblyDebugFixup, DEBUG_TYPE, + "Ensures debug_value's that have been stackified become stack relative", + false, false) + +FunctionPass *llvm::createWebAssemblyDebugFixup() { + return new WebAssemblyDebugFixup(); +} + +bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Debug Fixup **********\n" + "********** Function: " + << MF.getName() << '\n'); + + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + struct StackElem { + unsigned Reg; + MachineInstr *DebugValue; + }; + std::vector<StackElem> Stack; + for (MachineBasicBlock &MBB : MF) { + // We may insert into this list. + for (auto MII = MBB.begin(); MII != MBB.end(); ++MII) { + MachineInstr &MI = *MII; + if (MI.isDebugValue()) { + auto &MO = MI.getOperand(0); + // Also check if not a $noreg: likely a DBG_VALUE we just inserted. + if (MO.isReg() && MO.getReg().isValid() && + MFI.isVRegStackified(MO.getReg())) { + // Found a DBG_VALUE with a stackified register we will + // change into a stack operand. + // Search for register rather than assume it is on top (which it + // typically is if it appears right after the def), since + // DBG_VALUE's may shift under some circumstances. + for (auto &Elem : reverse(Stack)) { + if (MO.getReg() == Elem.Reg) { + auto Depth = static_cast<unsigned>(&Elem - &Stack[0]); + LLVM_DEBUG(dbgs() << "Debug Value VReg " << MO.getReg() + << " -> Stack Relative " << Depth << "\n"); + MO.ChangeToTargetIndex(WebAssembly::TI_OPERAND_STACK, Depth); + // Save the DBG_VALUE instruction that defined this stackified + // variable since later we need it to construct another one on + // pop. + Elem.DebugValue = &MI; + break; + } + } + // If the Reg was not found, we have a DBG_VALUE outside of its + // def-use range, and we leave it unmodified as reg, which means + // it will be culled later. + } + } else { + // Track stack depth. + for (MachineOperand &MO : reverse(MI.explicit_uses())) { + if (MO.isReg() && MFI.isVRegStackified(MO.getReg())) { + auto Prev = Stack.back(); + Stack.pop_back(); + assert(Prev.Reg == MO.getReg() && + "WebAssemblyDebugFixup: Pop: Register not matched!"); + if (Prev.DebugValue) { + // This stackified reg is a variable that started life at + // Prev.DebugValue, so now that we're popping it we must insert + // a $noreg DBG_VALUE for the variable to end it, right after + // the current instruction. + BuildMI(*Prev.DebugValue->getParent(), std::next(MII), + Prev.DebugValue->getDebugLoc(), TII->get(WebAssembly::DBG_VALUE), false, + Register(), Prev.DebugValue->getOperand(2).getMetadata(), + Prev.DebugValue->getOperand(3).getMetadata()); + } + } + } + for (MachineOperand &MO : MI.defs()) { + if (MO.isReg() && MFI.isVRegStackified(MO.getReg())) { + Stack.push_back({MO.getReg(), nullptr}); + } + } + } + } + assert(Stack.empty() && + "WebAssemblyDebugFixup: Stack not empty at end of basic block!"); + } + + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp index 114a50a3055d7..159fb4c00ddc7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp @@ -31,7 +31,7 @@ void WebAssemblyDebugValueManager::move(MachineInstr *Insert) { void WebAssemblyDebugValueManager::updateReg(unsigned Reg) { for (auto *DBI : DbgValues) - DBI->getOperand(0).setReg(Reg); + DBI->getDebugOperand(0).setReg(Reg); } void WebAssemblyDebugValueManager::clone(MachineInstr *Insert, @@ -40,14 +40,14 @@ void WebAssemblyDebugValueManager::clone(MachineInstr *Insert, MachineFunction *MF = MBB->getParent(); for (MachineInstr *DBI : reverse(DbgValues)) { MachineInstr *Clone = MF->CloneMachineInstr(DBI); - Clone->getOperand(0).setReg(NewReg); + Clone->getDebugOperand(0).setReg(NewReg); MBB->insert(Insert, Clone); } } void WebAssemblyDebugValueManager::replaceWithLocal(unsigned LocalId) { for (auto *DBI : DbgValues) { - MachineOperand &Op = DBI->getOperand(0); - Op.ChangeToTargetIndex(llvm::WebAssembly::TI_LOCAL_START, LocalId); + MachineOperand &Op = DBI->getDebugOperand(0); + Op.ChangeToTargetIndex(llvm::WebAssembly::TI_LOCAL, LocalId); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp index a511b320b56b1..c75de7aa207f3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp @@ -46,14 +46,14 @@ bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &MF) { void WebAssemblyExceptionInfo::recalculate( MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF) { // Postorder traversal of the dominator tree. - SmallVector<WebAssemblyException *, 8> Exceptions; + SmallVector<std::unique_ptr<WebAssemblyException>, 8> Exceptions; for (auto DomNode : post_order(&MDT)) { MachineBasicBlock *EHPad = DomNode->getBlock(); if (!EHPad->isEHPad()) continue; - auto *WE = new WebAssemblyException(EHPad); - discoverAndMapException(WE, MDT, MDF); - Exceptions.push_back(WE); + auto WE = std::make_unique<WebAssemblyException>(EHPad); + discoverAndMapException(WE.get(), MDT, MDF); + Exceptions.push_back(std::move(WE)); } // Add BBs to exceptions @@ -64,17 +64,21 @@ void WebAssemblyExceptionInfo::recalculate( WE->addBlock(MBB); } + SmallVector<WebAssemblyException*, 8> ExceptionPointers; + ExceptionPointers.reserve(Exceptions.size()); + // Add subexceptions to exceptions - for (auto *WE : Exceptions) { + for (auto &WE : Exceptions) { + ExceptionPointers.push_back(WE.get()); if (WE->getParentException()) - WE->getParentException()->getSubExceptions().push_back(WE); + WE->getParentException()->getSubExceptions().push_back(std::move(WE)); else - addTopLevelException(WE); + addTopLevelException(std::move(WE)); } // For convenience, Blocks and SubExceptions are inserted in postorder. // Reverse the lists. - for (auto *WE : Exceptions) { + for (auto *WE : ExceptionPointers) { WE->reverseBlock(); std::reverse(WE->getSubExceptions().begin(), WE->getSubExceptions().end()); } @@ -82,7 +86,6 @@ void WebAssemblyExceptionInfo::recalculate( void WebAssemblyExceptionInfo::releaseMemory() { BBMap.clear(); - DeleteContainerPointers(TopLevelExceptions); TopLevelExceptions.clear(); } @@ -181,6 +184,6 @@ raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE) { } void WebAssemblyExceptionInfo::print(raw_ostream &OS, const Module *) const { - for (auto *WE : TopLevelExceptions) + for (auto &WE : TopLevelExceptions) WE->print(OS); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h index 9a90d7df7d47d..50151ec8da5aa 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h @@ -43,13 +43,12 @@ class WebAssemblyException { MachineBasicBlock *EHPad = nullptr; WebAssemblyException *ParentException = nullptr; - std::vector<WebAssemblyException *> SubExceptions; + std::vector<std::unique_ptr<WebAssemblyException>> SubExceptions; std::vector<MachineBasicBlock *> Blocks; SmallPtrSet<const MachineBasicBlock *, 8> BlockSet; public: WebAssemblyException(MachineBasicBlock *EHPad) : EHPad(EHPad) {} - ~WebAssemblyException() { DeleteContainerPointers(SubExceptions); } WebAssemblyException(const WebAssemblyException &) = delete; const WebAssemblyException &operator=(const WebAssemblyException &) = delete; @@ -83,14 +82,16 @@ public: unsigned getNumBlocks() const { return Blocks.size(); } std::vector<MachineBasicBlock *> &getBlocksVector() { return Blocks; } - const std::vector<WebAssemblyException *> &getSubExceptions() const { + const std::vector<std::unique_ptr<WebAssemblyException>> &getSubExceptions() const { return SubExceptions; } - std::vector<WebAssemblyException *> &getSubExceptions() { + std::vector<std::unique_ptr<WebAssemblyException>> &getSubExceptions() { return SubExceptions; } - void addSubException(WebAssemblyException *E) { SubExceptions.push_back(E); } - using iterator = typename std::vector<WebAssemblyException *>::const_iterator; + void addSubException(std::unique_ptr<WebAssemblyException> E) { + SubExceptions.push_back(std::move(E)); + } + using iterator = typename decltype(SubExceptions)::const_iterator; iterator begin() const { return SubExceptions.begin(); } iterator end() const { return SubExceptions.end(); } @@ -117,7 +118,7 @@ raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE); class WebAssemblyExceptionInfo final : public MachineFunctionPass { // Mapping of basic blocks to the innermost exception they occur in DenseMap<const MachineBasicBlock *, WebAssemblyException *> BBMap; - std::vector<WebAssemblyException *> TopLevelExceptions; + std::vector<std::unique_ptr<WebAssemblyException>> TopLevelExceptions; void discoverAndMapException(WebAssemblyException *WE, const MachineDominatorTree &MDT, @@ -156,9 +157,9 @@ public: BBMap[MBB] = WE; } - void addTopLevelException(WebAssemblyException *WE) { + void addTopLevelException(std::unique_ptr<WebAssemblyException> WE) { assert(!WE->getParentException() && "Not a top level exception!"); - TopLevelExceptions.push_back(WE); + TopLevelExceptions.push_back(std::move(WE)); } void print(raw_ostream &OS, const Module *M = nullptr) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index acbd4c9921b0e..55925bcbe7711 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -31,16 +31,6 @@ using namespace llvm; #define DEBUG_TYPE "wasm-explicit-locals" -// A command-line option to disable this pass, and keep implicit locals -// for the purpose of testing with lit/llc ONLY. -// This produces output which is not valid WebAssembly, and is not supported -// by assemblers/disassemblers and other MC based tools. -static cl::opt<bool> WasmDisableExplicitLocals( - "wasm-disable-explicit-locals", cl::Hidden, - cl::desc("WebAssembly: output implicit locals in" - " instruction output for test purposes only."), - cl::init(false)); - namespace { class WebAssemblyExplicitLocals final : public MachineFunctionPass { StringRef getPassName() const override { @@ -69,13 +59,28 @@ FunctionPass *llvm::createWebAssemblyExplicitLocals() { return new WebAssemblyExplicitLocals(); } +static void checkFrameBase(WebAssemblyFunctionInfo &MFI, unsigned Local, + unsigned Reg) { + // Mark a local for the frame base vreg. + if (MFI.isFrameBaseVirtual() && Reg == MFI.getFrameBaseVreg()) { + LLVM_DEBUG({ + dbgs() << "Allocating local " << Local << "for VReg " + << Register::virtReg2Index(Reg) << '\n'; + }); + MFI.setFrameBaseLocal(Local); + } +} + /// Return a local id number for the given register, assigning it a new one /// if it doesn't yet have one. static unsigned getLocalId(DenseMap<unsigned, unsigned> &Reg2Local, - unsigned &CurLocal, unsigned Reg) { + WebAssemblyFunctionInfo &MFI, unsigned &CurLocal, + unsigned Reg) { auto P = Reg2Local.insert(std::make_pair(Reg, CurLocal)); - if (P.second) + if (P.second) { + checkFrameBase(MFI, CurLocal, Reg); ++CurLocal; + } return P.first->second; } @@ -168,11 +173,18 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) { /// start of the expression tree. static MachineInstr *findStartOfTree(MachineOperand &MO, MachineRegisterInfo &MRI, - WebAssemblyFunctionInfo &MFI) { + const WebAssemblyFunctionInfo &MFI) { Register Reg = MO.getReg(); assert(MFI.isVRegStackified(Reg)); MachineInstr *Def = MRI.getVRegDef(Reg); + // If this instruction has any non-stackified defs, it is the start + for (auto DefReg : Def->defs()) { + if (!MFI.isVRegStackified(DefReg.getReg())) { + return Def; + } + } + // Find the first stackified use and proceed from there. for (MachineOperand &DefMO : Def->explicit_uses()) { if (!DefMO.isReg()) @@ -189,10 +201,6 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { "********** Function: " << MF.getName() << '\n'); - // Disable this pass if directed to do so. - if (WasmDisableExplicitLocals) - return false; - bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); @@ -210,7 +218,9 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { break; Register Reg = MI.getOperand(0).getReg(); assert(!MFI.isVRegStackified(Reg)); - Reg2Local[Reg] = static_cast<unsigned>(MI.getOperand(1).getImm()); + auto Local = static_cast<unsigned>(MI.getOperand(1).getImm()); + Reg2Local[Reg] = Local; + checkFrameBase(MFI, Local, Reg); MI.eraseFromParent(); Changed = true; } @@ -233,6 +243,12 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { if (MI.isDebugInstr() || MI.isLabel()) continue; + if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) { + MI.eraseFromParent(); + Changed = true; + continue; + } + // Replace tee instructions with local.tee. The difference is that tee // instructions have two defs, while local.tee instructions have one def // and an index of a local to write to. @@ -244,18 +260,18 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // Stackify the input if it isn't stackified yet. if (!MFI.isVRegStackified(OldReg)) { - unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg); Register NewReg = MRI.createVirtualRegister(RC); unsigned Opc = getLocalGetOpcode(RC); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg) .addImm(LocalId); MI.getOperand(2).setReg(NewReg); - MFI.stackifyVReg(NewReg); + MFI.stackifyVReg(MRI, NewReg); } // Replace the TEE with a LOCAL_TEE. unsigned LocalId = - getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg()); + getLocalId(Reg2Local, MFI, CurLocal, MI.getOperand(1).getReg()); unsigned Opc = getLocalTeeOpcode(RC); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(0).getReg()) @@ -269,20 +285,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { continue; } - // Insert local.sets for any defs that aren't stackified yet. Currently - // we handle at most one def. - assert(MI.getDesc().getNumDefs() <= 1); - if (MI.getDesc().getNumDefs() == 1) { - Register OldReg = MI.getOperand(0).getReg(); + // Insert local.sets for any defs that aren't stackified yet. + for (auto &Def : MI.defs()) { + Register OldReg = Def.getReg(); if (!MFI.isVRegStackified(OldReg)) { const TargetRegisterClass *RC = MRI.getRegClass(OldReg); Register NewReg = MRI.createVirtualRegister(RC); auto InsertPt = std::next(MI.getIterator()); - if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) { - MI.eraseFromParent(); - Changed = true; - continue; - } if (UseEmpty[Register::virtReg2Index(OldReg)]) { unsigned Opc = getDropOpcode(RC); MachineInstr *Drop = @@ -290,8 +299,10 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { .addReg(NewReg); // After the drop instruction, this reg operand will not be used Drop->getOperand(0).setIsKill(); + if (MFI.isFrameBaseVirtual() && OldReg == MFI.getFrameBaseVreg()) + MFI.clearFrameBaseVreg(); } else { - unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg); unsigned Opc = getLocalSetOpcode(RC); WebAssemblyDebugValueManager(&MI).replaceWithLocal(LocalId); @@ -300,12 +311,12 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { .addImm(LocalId) .addReg(NewReg); } - MI.getOperand(0).setReg(NewReg); // This register operand of the original instruction is now being used // by the inserted drop or local.set instruction, so make it not dead // yet. - MI.getOperand(0).setIsDead(false); - MFI.stackifyVReg(NewReg); + Def.setReg(NewReg); + Def.setIsDead(false); + MFI.stackifyVReg(MRI, NewReg); Changed = true; } } @@ -323,7 +334,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // immediates. if (MO.isDef()) { assert(MI.isInlineAsm()); - unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg); // If this register operand is tied to another operand, we can't // change it to an immediate. Untie it first. MI.untieRegOperand(MI.getOperandNo(&MO)); @@ -341,7 +352,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // Our contract with inline asm register operands is to provide local // indices as immediates. if (MI.isInlineAsm()) { - unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg); // Untie it first if this reg operand is tied to another operand. MI.untieRegOperand(MI.getOperandNo(&MO)); MO.ChangeToImmediate(LocalId); @@ -349,7 +360,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { } // Insert a local.get. - unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg); const TargetRegisterClass *RC = MRI.getRegClass(OldReg); Register NewReg = MRI.createVirtualRegister(RC); unsigned Opc = getLocalGetOpcode(RC); @@ -357,7 +368,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg) .addImm(LocalId); MO.setReg(NewReg); - MFI.stackifyVReg(NewReg); + MFI.stackifyVReg(MRI, NewReg); Changed = true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index c932f985489ab..8a0092a3f2983 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -640,6 +640,9 @@ bool WebAssemblyFastISel::fastLowerArguments() { if (F->isVarArg()) return false; + if (FuncInfo.Fn->getCallingConv() == CallingConv::Swift) + return false; + unsigned I = 0; for (auto const &Arg : F->args()) { const AttributeList &Attrs = F->getAttributes(); @@ -754,17 +757,18 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { if (Func && Func->isIntrinsic()) return false; + if (Call->getCallingConv() == CallingConv::Swift) + return false; + bool IsDirect = Func != nullptr; - if (!IsDirect && isa<ConstantExpr>(Call->getCalledValue())) + if (!IsDirect && isa<ConstantExpr>(Call->getCalledOperand())) return false; FunctionType *FuncTy = Call->getFunctionType(); - unsigned Opc; + unsigned Opc = IsDirect ? WebAssembly::CALL : WebAssembly::CALL_INDIRECT; bool IsVoid = FuncTy->getReturnType()->isVoidTy(); unsigned ResultReg; - if (IsVoid) { - Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::PCALL_INDIRECT_VOID; - } else { + if (!IsVoid) { if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy()) return false; @@ -774,54 +778,36 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { case MVT::i8: case MVT::i16: case MVT::i32: - Opc = IsDirect ? WebAssembly::CALL_i32 : WebAssembly::PCALL_INDIRECT_i32; ResultReg = createResultReg(&WebAssembly::I32RegClass); break; case MVT::i64: - Opc = IsDirect ? WebAssembly::CALL_i64 : WebAssembly::PCALL_INDIRECT_i64; ResultReg = createResultReg(&WebAssembly::I64RegClass); break; case MVT::f32: - Opc = IsDirect ? WebAssembly::CALL_f32 : WebAssembly::PCALL_INDIRECT_f32; ResultReg = createResultReg(&WebAssembly::F32RegClass); break; case MVT::f64: - Opc = IsDirect ? WebAssembly::CALL_f64 : WebAssembly::PCALL_INDIRECT_f64; ResultReg = createResultReg(&WebAssembly::F64RegClass); break; case MVT::v16i8: - Opc = IsDirect ? WebAssembly::CALL_v16i8 - : WebAssembly::PCALL_INDIRECT_v16i8; ResultReg = createResultReg(&WebAssembly::V128RegClass); break; case MVT::v8i16: - Opc = IsDirect ? WebAssembly::CALL_v8i16 - : WebAssembly::PCALL_INDIRECT_v8i16; ResultReg = createResultReg(&WebAssembly::V128RegClass); break; case MVT::v4i32: - Opc = IsDirect ? WebAssembly::CALL_v4i32 - : WebAssembly::PCALL_INDIRECT_v4i32; ResultReg = createResultReg(&WebAssembly::V128RegClass); break; case MVT::v2i64: - Opc = IsDirect ? WebAssembly::CALL_v2i64 - : WebAssembly::PCALL_INDIRECT_v2i64; ResultReg = createResultReg(&WebAssembly::V128RegClass); break; case MVT::v4f32: - Opc = IsDirect ? WebAssembly::CALL_v4f32 - : WebAssembly::PCALL_INDIRECT_v4f32; ResultReg = createResultReg(&WebAssembly::V128RegClass); break; case MVT::v2f64: - Opc = IsDirect ? WebAssembly::CALL_v2f64 - : WebAssembly::PCALL_INDIRECT_v2f64; ResultReg = createResultReg(&WebAssembly::V128RegClass); break; case MVT::exnref: - Opc = IsDirect ? WebAssembly::CALL_exnref - : WebAssembly::PCALL_INDIRECT_exnref; ResultReg = createResultReg(&WebAssembly::EXNREFRegClass); break; default: @@ -861,7 +847,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { unsigned CalleeReg = 0; if (!IsDirect) { - CalleeReg = getRegForValue(Call->getCalledValue()); + CalleeReg = getRegForValue(Call->getCalledOperand()); if (!CalleeReg) return false; } @@ -871,14 +857,20 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { if (!IsVoid) MIB.addReg(ResultReg, RegState::Define); - if (IsDirect) + if (IsDirect) { MIB.addGlobalAddress(Func); - else - MIB.addReg(CalleeReg); + } else { + // Add placeholders for the type index and immediate flags + MIB.addImm(0); + MIB.addImm(0); + } for (unsigned ArgReg : Args) MIB.addReg(ArgReg); + if (!IsDirect) + MIB.addReg(CalleeReg); + if (!IsVoid) updateValueMap(Call, ResultReg); return true; @@ -1168,30 +1160,31 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) { unsigned Opc; const TargetRegisterClass *RC; + bool A64 = Subtarget->hasAddr64(); switch (getSimpleType(Load->getType())) { case MVT::i1: case MVT::i8: - Opc = WebAssembly::LOAD8_U_I32; + Opc = A64 ? WebAssembly::LOAD8_U_I32_A64 : WebAssembly::LOAD8_U_I32_A32; RC = &WebAssembly::I32RegClass; break; case MVT::i16: - Opc = WebAssembly::LOAD16_U_I32; + Opc = A64 ? WebAssembly::LOAD16_U_I32_A64 : WebAssembly::LOAD16_U_I32_A32; RC = &WebAssembly::I32RegClass; break; case MVT::i32: - Opc = WebAssembly::LOAD_I32; + Opc = A64 ? WebAssembly::LOAD_I32_A64 : WebAssembly::LOAD_I32_A32; RC = &WebAssembly::I32RegClass; break; case MVT::i64: - Opc = WebAssembly::LOAD_I64; + Opc = A64 ? WebAssembly::LOAD_I64_A64 : WebAssembly::LOAD_I64_A32; RC = &WebAssembly::I64RegClass; break; case MVT::f32: - Opc = WebAssembly::LOAD_F32; + Opc = A64 ? WebAssembly::LOAD_F32_A64 : WebAssembly::LOAD_F32_A32; RC = &WebAssembly::F32RegClass; break; case MVT::f64: - Opc = WebAssembly::LOAD_F64; + Opc = A64 ? WebAssembly::LOAD_F64_A64 : WebAssembly::LOAD_F64_A32; RC = &WebAssembly::F64RegClass; break; default: @@ -1224,27 +1217,28 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) { unsigned Opc; bool VTIsi1 = false; + bool A64 = Subtarget->hasAddr64(); switch (getSimpleType(Store->getValueOperand()->getType())) { case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; case MVT::i8: - Opc = WebAssembly::STORE8_I32; + Opc = A64 ? WebAssembly::STORE8_I32_A64 : WebAssembly::STORE8_I32_A32; break; case MVT::i16: - Opc = WebAssembly::STORE16_I32; + Opc = A64 ? WebAssembly::STORE16_I32_A64 : WebAssembly::STORE16_I32_A32; break; case MVT::i32: - Opc = WebAssembly::STORE_I32; + Opc = A64 ? WebAssembly::STORE_I32_A64 : WebAssembly::STORE_I32_A32; break; case MVT::i64: - Opc = WebAssembly::STORE_I64; + Opc = A64 ? WebAssembly::STORE_I64_A64 : WebAssembly::STORE_I64_A32; break; case MVT::f32: - Opc = WebAssembly::STORE_F32; + Opc = A64 ? WebAssembly::STORE_F32_A64 : WebAssembly::STORE_F32_A32; break; case MVT::f64: - Opc = WebAssembly::STORE_F64; + Opc = A64 ? WebAssembly::STORE_F64_A64 : WebAssembly::STORE_F64_A32; break; default: return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp new file mode 100644 index 0000000000000..7f805b34b4995 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp @@ -0,0 +1,155 @@ +//=- WebAssemblyFixBrTableDefaults.cpp - Fix br_table default branch targets -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file implements a pass that eliminates redundant range checks +/// guarding br_table instructions. Since jump tables on most targets cannot +/// handle out of range indices, LLVM emits these checks before most jump +/// tables. But br_table takes a default branch target as an argument, so it +/// does not need the range checks. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasm-fix-br-table-defaults" + +namespace { + +class WebAssemblyFixBrTableDefaults final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Fix br_table Defaults"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyFixBrTableDefaults() : MachineFunctionPass(ID) {} +}; + +char WebAssemblyFixBrTableDefaults::ID = 0; + +// `MI` is a br_table instruction with a dummy default target argument. This +// function finds and adds the default target argument and removes any redundant +// range check preceding the br_table. Returns the MBB that the br_table is +// moved into so it can be removed from further consideration, or nullptr if the +// br_table cannot be optimized. +MachineBasicBlock *fixBrTable(MachineInstr &MI, MachineBasicBlock *MBB, + MachineFunction &MF) { + // Get the header block, which contains the redundant range check. + assert(MBB->pred_size() == 1 && "Expected a single guard predecessor"); + auto *HeaderMBB = *MBB->pred_begin(); + + // Find the conditional jump to the default target. If it doesn't exist, the + // default target is unreachable anyway, so we can keep the existing dummy + // target. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 2> Cond; + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + bool Analyzed = !TII.analyzeBranch(*HeaderMBB, TBB, FBB, Cond); + assert(Analyzed && "Could not analyze jump header branches"); + (void)Analyzed; + + // Here are the possible outcomes. '_' is nullptr, `J` is the jump table block + // aka MBB, 'D' is the default block. + // + // TBB | FBB | Meaning + // _ | _ | No default block, header falls through to jump table + // J | _ | No default block, header jumps to the jump table + // D | _ | Header jumps to the default and falls through to the jump table + // D | J | Header jumps to the default and also to the jump table + if (TBB && TBB != MBB) { + assert((FBB == nullptr || FBB == MBB) && + "Expected jump or fallthrough to br_table block"); + assert(Cond.size() == 2 && Cond[1].isReg() && "Unexpected condition info"); + + // If the range check checks an i64 value, we cannot optimize it out because + // the i64 index is truncated to an i32, making values over 2^32 + // indistinguishable from small numbers. There are also other strange edge + // cases that can arise in practice that we don't want to reason about, so + // conservatively only perform the optimization if the range check is the + // normal case of an i32.gt_u. + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto *RangeCheck = MRI.getVRegDef(Cond[1].getReg()); + assert(RangeCheck != nullptr); + if (RangeCheck->getOpcode() != WebAssembly::GT_U_I32) + return nullptr; + + // Remove the dummy default target and install the real one. + MI.RemoveOperand(MI.getNumExplicitOperands() - 1); + MI.addOperand(MF, MachineOperand::CreateMBB(TBB)); + } + + // Remove any branches from the header and splice in the jump table instead + TII.removeBranch(*HeaderMBB, nullptr); + HeaderMBB->splice(HeaderMBB->end(), MBB, MBB->begin(), MBB->end()); + + // Update CFG to skip the old jump table block. Remove shared successors + // before transferring to avoid duplicated successors. + HeaderMBB->removeSuccessor(MBB); + for (auto &Succ : MBB->successors()) + if (HeaderMBB->isSuccessor(Succ)) + HeaderMBB->removeSuccessor(Succ); + HeaderMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Remove the old jump table block from the function + MF.erase(MBB); + + return HeaderMBB; +} + +bool WebAssemblyFixBrTableDefaults::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Fixing br_table Default Targets **********\n" + "********** Function: " + << MF.getName() << '\n'); + + bool Changed = false; + SmallPtrSet<MachineBasicBlock *, 16> MBBSet; + for (auto &MBB : MF) + MBBSet.insert(&MBB); + + while (!MBBSet.empty()) { + MachineBasicBlock *MBB = *MBBSet.begin(); + MBBSet.erase(MBB); + for (auto &MI : *MBB) { + if (WebAssembly::isBrTable(MI)) { + auto *Fixed = fixBrTable(MI, MBB, MF); + if (Fixed != nullptr) { + MBBSet.erase(Fixed); + Changed = true; + } + break; + } + } + } + + if (Changed) { + // We rewrote part of the function; recompute relevant things. + MF.RenumberBlocks(); + return true; + } + + return false; +} + +} // end anonymous namespace + +INITIALIZE_PASS(WebAssemblyFixBrTableDefaults, DEBUG_TYPE, + "Removes range checks and sets br_table default targets", false, + false) + +FunctionPass *llvm::createWebAssemblyFixBrTableDefaults() { + return new WebAssemblyFixBrTableDefaults(); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index 6b1bbd7a2b079..7abb6fa8905cc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -23,7 +23,6 @@ //===----------------------------------------------------------------------===// #include "WebAssembly.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -73,11 +72,11 @@ static void findUses(Value *V, Function &F, else if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) findUses(A, F, Uses, ConstantBCs); else if (U.get()->getType() != F.getType()) { - CallSite CS(U.getUser()); - if (!CS) + CallBase *CB = dyn_cast<CallBase>(U.getUser()); + if (!CB) // Skip uses that aren't immediately called continue; - Value *Callee = CS.getCalledValue(); + Value *Callee = CB->getCalledOperand(); if (Callee != V) // Skip calls where the function isn't the callee continue; @@ -244,6 +243,10 @@ bool FixFunctionBitcasts::runOnModule(Module &M) { // Collect all the places that need wrappers. for (Function &F : M) { + // Skip to fix when the function is swiftcc because swiftcc allows + // bitcast type difference for swiftself and swifterror. + if (F.getCallingConv() == CallingConv::Swift) + continue; findUses(&F, F, Uses, ConstantBCs); // If we have a "main" function, and its type isn't @@ -304,7 +307,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) { if (CallMain) { Main->setName("__original_main"); auto *MainWrapper = - cast<Function>(CallMain->getCalledValue()->stripPointerCasts()); + cast<Function>(CallMain->getCalledOperand()->stripPointerCasts()); delete CallMain; if (Main->isDeclaration()) { // The wrapper is not needed in this case as we don't need to export diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 157ea9d525c96..1ceae59dc9939 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -66,6 +66,17 @@ namespace { using BlockVector = SmallVector<MachineBasicBlock *, 4>; using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; +static BlockVector getSortedEntries(const BlockSet &Entries) { + BlockVector SortedEntries(Entries.begin(), Entries.end()); + llvm::sort(SortedEntries, + [](const MachineBasicBlock *A, const MachineBasicBlock *B) { + auto ANum = A->getNumber(); + auto BNum = B->getNumber(); + return ANum < BNum; + }); + return SortedEntries; +} + // Calculates reachability in a region. Ignores branches to blocks outside of // the region, and ignores branches to the region entry (for the case where // the region is the inner part of a loop). @@ -241,7 +252,6 @@ public: bool WebAssemblyFixIrreducibleControlFlow::processRegion( MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) { bool Changed = false; - // Remove irreducibility before processing child loops, which may take // multiple iterations. while (true) { @@ -249,12 +259,18 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion( bool FoundIrreducibility = false; - for (auto *LoopEntry : Graph.getLoopEntries()) { + for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) { // Find mutual entries - all entries which can reach this one, and // are reached by it (that always includes LoopEntry itself). All mutual // entries must be in the same loop, so if we have more than one, then we // have irreducible control flow. // + // (Note that we need to sort the entries here, as otherwise the order can + // matter: being mutual is a symmetric relationship, and each set of + // mutuals will be handled properly no matter which we see first. However, + // there can be multiple disjoint sets of mutuals, and which we process + // first changes the output.) + // // Note that irreducibility may involve inner loops, e.g. imagine A // starts one loop, and it has B inside it which starts an inner loop. // If we add a branch from all the way on the outside to B, then in a @@ -325,13 +341,7 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( assert(Entries.size() >= 2); // Sort the entries to ensure a deterministic build. - BlockVector SortedEntries(Entries.begin(), Entries.end()); - llvm::sort(SortedEntries, - [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { - auto ANum = A->getNumber(); - auto BNum = B->getNumber(); - return ANum < BNum; - }); + BlockVector SortedEntries = getSortedEntries(Entries); #ifndef NDEBUG for (auto Block : SortedEntries) @@ -403,31 +413,33 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( } // Record if each entry has a layout predecessor. This map stores - // <<Predecessor is within the loop?, loop entry>, layout predecessor> - std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> + // <<loop entry, Predecessor is within the loop?>, layout predecessor> + DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *> EntryToLayoutPred; - for (auto *Pred : AllPreds) + for (auto *Pred : AllPreds) { + bool PredInLoop = InLoop.count(Pred); for (auto *Entry : Pred->successors()) if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry)) - EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred; + EntryToLayoutPred[{Entry, PredInLoop}] = Pred; + } // We need to create at most two routing blocks per entry: one for // predecessors outside the loop and one for predecessors inside the loop. // This map stores - // <<Predecessor is within the loop?, loop entry>, routing block> - std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map; + // <<loop entry, Predecessor is within the loop?>, routing block> + DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *> + Map; for (auto *Pred : AllPreds) { bool PredInLoop = InLoop.count(Pred); for (auto *Entry : Pred->successors()) { - if (!Entries.count(Entry) || - Map.count(std::make_pair(InLoop.count(Pred), Entry))) + if (!Entries.count(Entry) || Map.count({Entry, PredInLoop})) continue; // If there exists a layout predecessor of this entry and this predecessor // is not that, we rather create a routing block after that layout // predecessor to save a branch. - if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) && - EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred) - continue; + if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop})) + if (OtherPred != Pred) + continue; // This is a successor we need to rewrite. MachineBasicBlock *Routing = MF.CreateMachineBasicBlock(); @@ -443,7 +455,7 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( .addImm(Indices[Entry]); BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch); Routing->addSuccessor(Dispatch); - Map[std::make_pair(PredInLoop, Entry)] = Routing; + Map[{Entry, PredInLoop}] = Routing; } } @@ -453,12 +465,12 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( for (MachineInstr &Term : Pred->terminators()) for (auto &Op : Term.explicit_uses()) if (Op.isMBB() && Indices.count(Op.getMBB())) - Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]); + Op.setMBB(Map[{Op.getMBB(), PredInLoop}]); for (auto *Succ : Pred->successors()) { if (!Entries.count(Succ)) continue; - auto *Routing = Map[std::make_pair(PredInLoop, Succ)]; + auto *Routing = Map[{Succ, PredInLoop}]; Pred->replaceSuccessor(Succ, Routing); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 71eeebfada4bc..95669932e73f5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -19,6 +19,7 @@ #include "WebAssemblyFrameLowering.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyInstrInfo.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" @@ -86,8 +87,8 @@ bool WebAssemblyFrameLowering::needsSPForLocalFrame( } // In function with EH pads, we need to make a copy of the value of -// __stack_pointer global in SP32 register, in order to use it when restoring -// __stack_pointer after an exception is caught. +// __stack_pointer global in SP32/64 register, in order to use it when +// restoring __stack_pointer after an exception is caught. bool WebAssemblyFrameLowering::needsPrologForEH( const MachineFunction &MF) const { auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType(); @@ -122,6 +123,57 @@ bool WebAssemblyFrameLowering::needsSPWriteback( return needsSPForLocalFrame(MF) && !CanUseRedZone; } +unsigned WebAssemblyFrameLowering::getSPReg(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::SP64 + : WebAssembly::SP32; +} + +unsigned WebAssemblyFrameLowering::getFPReg(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::FP64 + : WebAssembly::FP32; +} + +unsigned +WebAssemblyFrameLowering::getOpcConst(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::CONST_I64 + : WebAssembly::CONST_I32; +} + +unsigned WebAssemblyFrameLowering::getOpcAdd(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::ADD_I64 + : WebAssembly::ADD_I32; +} + +unsigned WebAssemblyFrameLowering::getOpcSub(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::SUB_I64 + : WebAssembly::SUB_I32; +} + +unsigned WebAssemblyFrameLowering::getOpcAnd(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::AND_I64 + : WebAssembly::AND_I32; +} + +unsigned +WebAssemblyFrameLowering::getOpcGlobGet(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::GLOBAL_GET_I64 + : WebAssembly::GLOBAL_GET_I32; +} + +unsigned +WebAssemblyFrameLowering::getOpcGlobSet(const MachineFunction &MF) { + return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() + ? WebAssembly::GLOBAL_SET_I64 + : WebAssembly::GLOBAL_SET_I32; +} + void WebAssemblyFrameLowering::writeSPToGlobal( unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const { @@ -129,7 +181,8 @@ void WebAssemblyFrameLowering::writeSPToGlobal( const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32)) + + BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF))) .addExternalSymbol(SPSymbol) .addReg(SrcReg); } @@ -140,11 +193,12 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( MachineBasicBlock::iterator I) const { assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) && "Call frame pseudos should only be used for dynamic stack adjustment"); - const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &ST = MF.getSubtarget<WebAssemblySubtarget>(); + const auto *TII = ST.getInstrInfo(); if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && needsSPWriteback(MF)) { DebugLoc DL = I->getDebugLoc(); - writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL); + writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL); } return MBB.erase(I); } @@ -160,7 +214,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, return; uint64_t StackSize = MFI.getStackSize(); - const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &ST = MF.getSubtarget<WebAssemblySubtarget>(); + const auto *TII = ST.getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.begin(); @@ -171,13 +226,13 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); - unsigned SPReg = WebAssembly::SP32; + unsigned SPReg = getSPReg(MF); if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); const char *ES = "__stack_pointer"; auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg) .addExternalSymbol(SPSymbol); bool HasBP = hasBP(MF); @@ -191,34 +246,30 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize) { // Subtract the frame size Register OffsetReg = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg) .addImm(StackSize); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), - WebAssembly::SP32) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcSub(MF)), getSPReg(MF)) .addReg(SPReg) .addReg(OffsetReg); } if (HasBP) { Register BitmaskReg = MRI.createVirtualRegister(PtrRC); - unsigned Alignment = MFI.getMaxAlignment(); - assert((1u << countTrailingZeros(Alignment)) == Alignment && - "Alignment must be a power of 2"); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg) - .addImm((int)~(Alignment - 1)); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32), - WebAssembly::SP32) - .addReg(WebAssembly::SP32) + Align Alignment = MFI.getMaxAlign(); + BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), BitmaskReg) + .addImm((int64_t) ~(Alignment.value() - 1)); + BuildMI(MBB, InsertPt, DL, TII->get(getOpcAnd(MF)), getSPReg(MF)) + .addReg(getSPReg(MF)) .addReg(BitmaskReg); } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive // offsets in load/store instructions. - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) - .addReg(WebAssembly::SP32); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), getFPReg(MF)) + .addReg(getSPReg(MF)); } if (StackSize && needsSPWriteback(MF)) { - writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL); + writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL); } } @@ -227,7 +278,8 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, uint64_t StackSize = MF.getFrameInfo().getStackSize(); if (!needsSP(MF) || !needsSPWriteback(MF)) return; - const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &ST = MF.getSubtarget<WebAssemblySubtarget>(); + const auto *TII = ST.getInstrInfo(); auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.getFirstTerminator(); DebugLoc DL; @@ -238,6 +290,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, // Restore the stack pointer. If we had fixed-size locals, add the offset // subtracted in the prolog. unsigned SPReg = 0; + unsigned SPFPReg = hasFP(MF) ? getFPReg(MF) : getSPReg(MF); if (hasBP(MF)) { auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); SPReg = FI->getBasePointerVreg(); @@ -245,17 +298,34 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); Register OffsetReg = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg) .addImm(StackSize); - // In the epilog we don't need to write the result back to the SP32 physreg - // because it won't be used again. We can use a stackified register instead. + // In the epilog we don't need to write the result back to the SP32/64 + // physreg because it won't be used again. We can use a stackified register + // instead. SPReg = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) - .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) + BuildMI(MBB, InsertPt, DL, TII->get(getOpcAdd(MF)), SPReg) + .addReg(SPFPReg) .addReg(OffsetReg); } else { - SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; + SPReg = SPFPReg; } writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); } + +TargetFrameLowering::DwarfFrameBase +WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { + DwarfFrameBase Loc; + Loc.Kind = DwarfFrameBase::WasmFrameBase; + const WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + if (needsSP(MF) && MFI.isFrameBaseVirtual()) { + unsigned LocalNum = MFI.getFrameBaseLocal(); + Loc.Location.WasmLoc = {WebAssembly::TI_LOCAL, LocalNum}; + } else { + // TODO: This should work on a breakpoint at a function with no frame, + // but probably won't work for traversing up the stack. + Loc.Location.WasmLoc = {WebAssembly::TI_GLOBAL_RELOC, 0}; + } + return Loc; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index fdc0f561dcd96..e16f639ff22b0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -18,7 +18,6 @@ #include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { -class MachineFrameInfo; class WebAssemblyFrameLowering final : public TargetFrameLowering { public: @@ -44,6 +43,7 @@ public: bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override; bool needsPrologForEH(const MachineFunction &MF) const; @@ -53,6 +53,15 @@ public: MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const; + static unsigned getSPReg(const MachineFunction &MF); + static unsigned getFPReg(const MachineFunction &MF); + static unsigned getOpcConst(const MachineFunction &MF); + static unsigned getOpcAdd(const MachineFunction &MF); + static unsigned getOpcSub(const MachineFunction &MF); + static unsigned getOpcAnd(const MachineFunction &MF); + static unsigned getOpcGlobGet(const MachineFunction &MF); + static unsigned getOpcGlobSet(const MachineFunction &MF); + private: bool hasBP(const MachineFunction &MF) const; bool needsSPForLocalFrame(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index ba04fd4eb9dd2..dee1c4e281494 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -13,8 +13,7 @@ // NOTE: NO INCLUDE GUARD DESIRED! -HANDLE_NODETYPE(CALL1) -HANDLE_NODETYPE(CALL0) +HANDLE_NODETYPE(CALL) HANDLE_NODETYPE(RET_CALL) HANDLE_NODETYPE(RETURN) HANDLE_NODETYPE(ARGUMENT) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 531a07b829c85..d1a696f854f8b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -53,11 +53,6 @@ public: Subtarget = &MF.getSubtarget<WebAssemblySubtarget>(); - // Wasm64 is not fully supported right now (and is not specified) - if (Subtarget->hasAddr64()) - report_fatal_error( - "64-bit WebAssembly (wasm64) is not currently supported"); - return SelectionDAGISel::runOnMachineFunction(MF); } @@ -82,6 +77,13 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { return; } + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 + : WebAssembly::GLOBAL_GET_I32; + auto ConstIns = + PtrVT == MVT::i64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; + auto AddIns = PtrVT == MVT::i64 ? WebAssembly::ADD_I64 : WebAssembly::ADD_I32; + // Few custom selection stuff. SDLoc DL(Node); MachineFunction &MF = CurDAG->getMachineFunction(); @@ -145,20 +147,16 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { false); } - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT); SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress( GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0); - MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, - DL, MVT::i32, TLSBaseSym); - MachineSDNode *TLSOffset = CurDAG->getMachineNode( - WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym); - MachineSDNode *TLSAddress = - CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32, - SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); + MachineSDNode *TLSBase = + CurDAG->getMachineNode(GlobalGetIns, DL, PtrVT, TLSBaseSym); + MachineSDNode *TLSOffset = + CurDAG->getMachineNode(ConstIns, DL, PtrVT, TLSOffsetSym); + MachineSDNode *TLSAddress = CurDAG->getMachineNode( + AddIns, DL, PtrVT, SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); ReplaceNode(Node, TLSAddress); return; } @@ -167,22 +165,16 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); switch (IntNo) { case Intrinsic::wasm_tls_size: { - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - MachineSDNode *TLSSize = CurDAG->getMachineNode( - WebAssembly::GLOBAL_GET_I32, DL, PtrVT, - CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32)); + GlobalGetIns, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_size", PtrVT)); ReplaceNode(Node, TLSSize); return; } case Intrinsic::wasm_tls_align: { - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - MachineSDNode *TLSAlign = CurDAG->getMachineNode( - WebAssembly::GLOBAL_GET_I32, DL, PtrVT, - CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32)); + GlobalGetIns, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_align", PtrVT)); ReplaceNode(Node, TLSAlign); return; } @@ -193,11 +185,8 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); switch (IntNo) { case Intrinsic::wasm_tls_base: { - MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); - assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); - MachineSDNode *TLSBase = CurDAG->getMachineNode( - WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other, + GlobalGetIns, DL, PtrVT, MVT::Other, CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), Node->getOperand(0)); ReplaceNode(Node, TLSBase); @@ -206,6 +195,35 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { } break; } + case WebAssemblyISD::CALL: + case WebAssemblyISD::RET_CALL: { + // CALL has both variable operands and variable results, but ISel only + // supports one or the other. Split calls into two nodes glued together, one + // for the operands and one for the results. These two nodes will be + // recombined in a custom inserter hook into a single MachineInstr. + SmallVector<SDValue, 16> Ops; + for (size_t i = 1; i < Node->getNumOperands(); ++i) { + SDValue Op = Node->getOperand(i); + if (i == 1 && Op->getOpcode() == WebAssemblyISD::Wrapper) + Op = Op->getOperand(0); + Ops.push_back(Op); + } + + // Add the chain last + Ops.push_back(Node->getOperand(0)); + MachineSDNode *CallParams = + CurDAG->getMachineNode(WebAssembly::CALL_PARAMS, DL, MVT::Glue, Ops); + + unsigned Results = Node->getOpcode() == WebAssemblyISD::CALL + ? WebAssembly::CALL_RESULTS + : WebAssembly::RET_CALL_RESULTS; + + SDValue Link(CallParams, 0); + MachineSDNode *CallResults = + CurDAG->getMachineNode(Results, DL, Node->getVTList(), Link); + ReplaceNode(Node, CallResults); + return; + } default: break; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 5b177c0c5d9d5..a9b9eceb41304 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -61,8 +61,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); - } - if (Subtarget->hasUnimplementedSIMD128()) { addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); } @@ -116,97 +114,81 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( for (auto T : {MVT::i32, MVT::i64}) setOperationAction(Op, T, Expand); if (Subtarget->hasSIMD128()) - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) setOperationAction(Op, T, Expand); - if (Subtarget->hasUnimplementedSIMD128()) - setOperationAction(Op, MVT::v2i64, Expand); } // SIMD-specific configuration if (Subtarget->hasSIMD128()) { + // Hoist bitcasts out of shuffles + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + // Support saturating add for i8x16 and i16x8 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) for (auto T : {MVT::v16i8, MVT::v8i16}) setOperationAction(Op, T, Legal); + // Support integer abs + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + setOperationAction(ISD::ABS, T, Legal); + // Custom lower BUILD_VECTORs to minimize number of replace_lanes - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) setOperationAction(ISD::BUILD_VECTOR, T, Custom); - if (Subtarget->hasUnimplementedSIMD128()) - for (auto T : {MVT::v2i64, MVT::v2f64}) - setOperationAction(ISD::BUILD_VECTOR, T, Custom); // We have custom shuffle lowering to expose the shuffle mask - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); - if (Subtarget->hasUnimplementedSIMD128()) - for (auto T: {MVT::v2i64, MVT::v2f64}) - setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); // Custom lowering since wasm shifts must have a scalar shift amount - for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) setOperationAction(Op, T, Custom); - if (Subtarget->hasUnimplementedSIMD128()) - setOperationAction(Op, MVT::v2i64, Custom); - } // Custom lower lane accesses to expand out variable indices - for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) setOperationAction(Op, T, Custom); - if (Subtarget->hasUnimplementedSIMD128()) - for (auto T : {MVT::v2i64, MVT::v2f64}) - setOperationAction(Op, T, Custom); - } - // There is no i64x2.mul instruction - setOperationAction(ISD::MUL, MVT::v2i64, Expand); + // There is no i8x16.mul instruction + setOperationAction(ISD::MUL, MVT::v16i8, Expand); // There are no vector select instructions - for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) setOperationAction(Op, T, Expand); - if (Subtarget->hasUnimplementedSIMD128()) - for (auto T : {MVT::v2i64, MVT::v2f64}) - setOperationAction(Op, T, Expand); - } // Expand integer operations supported for scalars but not SIMD for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, - ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) setOperationAction(Op, T, Expand); - if (Subtarget->hasUnimplementedSIMD128()) - setOperationAction(Op, MVT::v2i64, Expand); - } // But we do have integer min and max operations - if (Subtarget->hasUnimplementedSIMD128()) { - for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) - setOperationAction(Op, T, Legal); - } + for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + setOperationAction(Op, T, Legal); // Expand float operations supported for scalars but not SIMD for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, - ISD::FEXP, ISD::FEXP2, ISD::FRINT}) { - setOperationAction(Op, MVT::v4f32, Expand); - if (Subtarget->hasUnimplementedSIMD128()) - setOperationAction(Op, MVT::v2f64, Expand); - } + ISD::FEXP, ISD::FEXP2, ISD::FRINT}) + for (auto T : {MVT::v4f32, MVT::v2f64}) + setOperationAction(Op, T, Expand); // Expand operations not supported for i64x2 vectors - if (Subtarget->hasUnimplementedSIMD128()) - for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC) - setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom); - - // Expand additional SIMD ops that V8 hasn't implemented yet - if (!Subtarget->hasUnimplementedSIMD128()) { - setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); - setOperationAction(ISD::FDIV, MVT::v4f32, Expand); - } + for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC) + setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom); + + // 64x2 conversions are not in the spec + for (auto Op : + {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(Op, T, Expand); } // As a special case, these operators use the type to mean the type to @@ -227,6 +209,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setOperationAction(ISD::FrameIndex, MVT::i64, Custom); setOperationAction(ISD::CopyToReg, MVT::Other, Custom); // Expand these forms; we pattern-match the forms that we can handle in isel. @@ -259,12 +242,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( } } // But some vector extending loads are legal - if (Subtarget->hasUnimplementedSIMD128()) { - for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { - setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); - setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); - setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); - } + for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { + setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); } } @@ -273,6 +254,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Trap lowers to wasm unreachable setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); // Exception handling intrinsics setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -434,6 +416,58 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, return DoneMBB; } +static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults, + DebugLoc DL, MachineBasicBlock *BB, + const TargetInstrInfo &TII) { + MachineInstr &CallParams = *CallResults.getPrevNode(); + assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS); + assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS || + CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS); + + bool IsIndirect = CallParams.getOperand(0).isReg(); + bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS; + + unsigned CallOp; + if (IsIndirect && IsRetCall) { + CallOp = WebAssembly::RET_CALL_INDIRECT; + } else if (IsIndirect) { + CallOp = WebAssembly::CALL_INDIRECT; + } else if (IsRetCall) { + CallOp = WebAssembly::RET_CALL; + } else { + CallOp = WebAssembly::CALL; + } + + MachineFunction &MF = *BB->getParent(); + const MCInstrDesc &MCID = TII.get(CallOp); + MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); + + // Move the function pointer to the end of the arguments for indirect calls + if (IsIndirect) { + auto FnPtr = CallParams.getOperand(0); + CallParams.RemoveOperand(0); + CallParams.addOperand(FnPtr); + } + + for (auto Def : CallResults.defs()) + MIB.add(Def); + + // Add placeholders for the type index and immediate flags + if (IsIndirect) { + MIB.addImm(0); + MIB.addImm(0); + } + + for (auto Use : CallParams.uses()) + MIB.add(Use); + + BB->insert(CallResults.getIterator(), MIB); + CallParams.eraseFromParent(); + CallResults.eraseFromParent(); + + return BB; +} + MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); @@ -466,7 +500,9 @@ MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( case WebAssembly::FP_TO_UINT_I64_F64: return LowerFPToInt(MI, DL, BB, TII, true, true, true, WebAssembly::I64_TRUNC_U_F64); - llvm_unreachable("Unexpected instruction to emit with custom inserter"); + case WebAssembly::CALL_RESULTS: + case WebAssembly::RET_CALL_RESULTS: + return LowerCallResults(MI, DL, BB, TII); } } @@ -565,8 +601,6 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, } bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { - if (!Subtarget->hasUnimplementedSIMD128()) - return false; MVT ExtT = ExtVal.getSimpleValueType(); MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0); return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || @@ -580,7 +614,11 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, if (VT.isVector()) return VT.changeVectorElementTypeToInteger(); - return TargetLowering::getSetCCResultType(DL, C, VT); + // So far, all branch instructions in Wasm take an I32 condition. + // The default TargetLowering::getSetCCResultType returns the pointer size, + // which would be useful to reduce instruction counts when testing + // against 64-bit pointers/values if at some point Wasm supports that. + return EVT::getIntegerVT(C, 32); } bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, @@ -648,7 +686,8 @@ static bool callingConvSupported(CallingConv::ID CallConv) { CallConv == CallingConv::PreserveMost || CallConv == CallingConv::PreserveAll || CallConv == CallingConv::CXX_FAST_TLS || - CallConv == CallingConv::WASM_EmscriptenInvoke; + CallConv == CallingConv::WASM_EmscriptenInvoke || + CallConv == CallingConv::Swift; } SDValue @@ -670,41 +709,57 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, fail(DL, DAG, "WebAssembly doesn't support patch point yet"); if (CLI.IsTailCall) { - bool MustTail = CLI.CS && CLI.CS.isMustTailCall(); - if (Subtarget->hasTailCall() && !CLI.IsVarArg) { - // Do not tail call unless caller and callee return types match - const Function &F = MF.getFunction(); - const TargetMachine &TM = getTargetMachine(); - Type *RetTy = F.getReturnType(); - SmallVector<MVT, 4> CallerRetTys; - SmallVector<MVT, 4> CalleeRetTys; - computeLegalValueVTs(F, TM, RetTy, CallerRetTys); - computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); - bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && - std::equal(CallerRetTys.begin(), CallerRetTys.end(), - CalleeRetTys.begin()); - if (!TypesMatch) { - // musttail in this case would be an LLVM IR validation failure - assert(!MustTail); - CLI.IsTailCall = false; - } - } else { + auto NoTail = [&](const char *Msg) { + if (CLI.CB && CLI.CB->isMustTailCall()) + fail(DL, DAG, Msg); CLI.IsTailCall = false; - if (MustTail) { - if (CLI.IsVarArg) { - // The return would pop the argument buffer - fail(DL, DAG, "WebAssembly does not support varargs tail calls"); - } else { - fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); + }; + + if (!Subtarget->hasTailCall()) + NoTail("WebAssembly 'tail-call' feature not enabled"); + + // Varargs calls cannot be tail calls because the buffer is on the stack + if (CLI.IsVarArg) + NoTail("WebAssembly does not support varargs tail calls"); + + // Do not tail call unless caller and callee return types match + const Function &F = MF.getFunction(); + const TargetMachine &TM = getTargetMachine(); + Type *RetTy = F.getReturnType(); + SmallVector<MVT, 4> CallerRetTys; + SmallVector<MVT, 4> CalleeRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); + bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && + std::equal(CallerRetTys.begin(), CallerRetTys.end(), + CalleeRetTys.begin()); + if (!TypesMatch) + NoTail("WebAssembly tail call requires caller and callee return types to " + "match"); + + // If pointers to local stack values are passed, we cannot tail call + if (CLI.CB) { + for (auto &Arg : CLI.CB->args()) { + Value *Val = Arg.get(); + // Trace the value back through pointer operations + while (true) { + Value *Src = Val->stripPointerCastsAndAliases(); + if (auto *GEP = dyn_cast<GetElementPtrInst>(Src)) + Src = GEP->getPointerOperand(); + if (Val == Src) + break; + Val = Src; + } + if (isa<AllocaInst>(Val)) { + NoTail( + "WebAssembly does not support tail calling with stack arguments"); + break; } } } } SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; - if (Ins.size() > 1) - fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); - SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; @@ -717,10 +772,14 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, std::swap(OutVals[0], OutVals[1]); } + bool HasSwiftSelfArg = false; + bool HasSwiftErrorArg = false; unsigned NumFixedArgs = 0; for (unsigned I = 0; I < Outs.size(); ++I) { const ISD::OutputArg &Out = Outs[I]; SDValue &OutVal = OutVals[I]; + HasSwiftSelfArg |= Out.Flags.isSwiftSelf(); + HasSwiftErrorArg |= Out.Flags.isSwiftError(); if (Out.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (Out.Flags.isInAlloca()) @@ -732,13 +791,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { auto &MFI = MF.getFrameInfo(); int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), - Out.Flags.getByValAlign(), + Out.Flags.getNonZeroByValAlign(), /*isSS=*/false); SDValue SizeNode = DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); Chain = DAG.getMemcpy( - Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), + Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(), /*isVolatile*/ false, /*AlwaysInline=*/false, /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); OutVal = FINode; @@ -750,6 +809,29 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsVarArg = CLI.IsVarArg; auto PtrVT = getPointerTy(Layout); + // For swiftcc, emit additional swiftself and swifterror arguments + // if there aren't. These additional arguments are also added for callee + // signature They are necessary to match callee and caller signature for + // indirect call. + if (CallConv == CallingConv::Swift) { + if (!HasSwiftSelfArg) { + NumFixedArgs++; + ISD::OutputArg Arg; + Arg.Flags.setSwiftSelf(); + CLI.Outs.push_back(Arg); + SDValue ArgVal = DAG.getUNDEF(PtrVT); + CLI.OutVals.push_back(ArgVal); + } + if (!HasSwiftErrorArg) { + NumFixedArgs++; + ISD::OutputArg Arg; + Arg.Flags.setSwiftError(); + CLI.Outs.push_back(Arg); + SDValue ArgVal = DAG.getUNDEF(PtrVT); + CLI.OutVals.push_back(ArgVal); + } + } + // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); @@ -763,10 +845,10 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, EVT VT = Arg.getValueType(); assert(VT != MVT::iPTR && "Legalized args should be concrete"); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - unsigned Align = std::max(Out.Flags.getOrigAlign(), - Layout.getABITypeAlignment(Ty)); - unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), - Align); + Align Alignment = + std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty)); + unsigned Offset = + CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment); CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), Offset, VT.getSimpleVT(), CCValAssign::Full)); @@ -838,7 +920,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (In.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last return values"); - // Ignore In.getOrigAlign() because all our arguments are passed in + // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in // registers. InTys.push_back(In.VT); } @@ -851,17 +933,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, InTys.push_back(MVT::Other); SDVTList InTyList = DAG.getVTList(InTys); - SDValue Res = - DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, - DL, InTyList, Ops); - if (Ins.empty()) { - Chain = Res; - } else { - InVals.push_back(Res); - Chain = Res.getValue(1); - } + SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops); - return Chain; + for (size_t I = 0; I < Ins.size(); ++I) + InVals.push_back(Res.getValue(I)); + + // Return the chain + return Res.getValue(Ins.size()); } bool WebAssemblyTargetLowering::CanLowerReturn( @@ -916,7 +994,11 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( // of the incoming values before they're represented by virtual registers. MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); + bool HasSwiftErrorArg = false; + bool HasSwiftSelfArg = false; for (const ISD::InputArg &In : Ins) { + HasSwiftSelfArg |= In.Flags.isSwiftSelf(); + HasSwiftErrorArg |= In.Flags.isSwiftError(); if (In.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); if (In.Flags.isNest()) @@ -925,7 +1007,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); if (In.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); - // Ignore In.getOrigAlign() because all our arguments are passed in + // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in // registers. InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, DAG.getTargetConstant(InVals.size(), @@ -936,6 +1018,19 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( MFI->addParam(In.VT); } + // For swiftcc, emit additional swiftself and swifterror arguments + // if there aren't. These additional arguments are also added for callee + // signature They are necessary to match callee and caller signature for + // indirect call. + auto PtrVT = getPointerTy(MF.getDataLayout()); + if (CallConv == CallingConv::Swift) { + if (!HasSwiftSelfArg) { + MFI->addParam(PtrVT); + } + if (!HasSwiftErrorArg) { + MFI->addParam(PtrVT); + } + } // Varargs are copied into a buffer allocated by the caller, and a pointer to // the buffer is passed as an argument. if (IsVarArg) { @@ -953,8 +1048,8 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( // Record the number and types of arguments and results. SmallVector<MVT, 4> Params; SmallVector<MVT, 4> Results; - computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(), - DAG.getTarget(), Params, Results); + computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(), + MF.getFunction(), DAG.getTarget(), Params, Results); for (MVT VT : Results) MFI->addResult(VT); // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify @@ -1190,11 +1285,10 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB)); - // TODO: For now, we just pick something arbitrary for a default case for now. - // We really want to sniff out the guard and put in the real default case (and - // delete the guard). - Ops.push_back(DAG.getBasicBlock(MBBs[0])); - + // Add the first MBB as a dummy default target for now. This will be replaced + // with the proper default target (and the preceding range check eliminated) + // if possible by WebAssemblyFixBrTableDefaults. + Ops.push_back(DAG.getBasicBlock(*MBBs.begin())); return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); } @@ -1262,6 +1356,24 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, Op.getOperand(3) // thrown value }); } + + case Intrinsic::wasm_shuffle: { + // Drop in-chain and replace undefs, but otherwise pass through unchanged + SDValue Ops[18]; + size_t OpIdx = 0; + Ops[OpIdx++] = Op.getOperand(1); + Ops[OpIdx++] = Op.getOperand(2); + while (OpIdx < 18) { + const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); + if (MaskIdx.isUndef() || + cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) { + Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32); + } else { + Ops[OpIdx++] = MaskIdx; + } + } + return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); + } } } @@ -1270,39 +1382,42 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // If sign extension operations are disabled, allow sext_inreg only if operand - // is a vector extract. SIMD does not depend on sign extension operations, but - // allowing sext_inreg in this context lets us have simple patterns to select - // extract_lane_s instructions. Expanding sext_inreg everywhere would be - // simpler in this file, but would necessitate large and brittle patterns to - // undo the expansion and select extract_lane_s instructions. + // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign + // extension operations, but allowing sext_inreg in this context lets us have + // simple patterns to select extract_lane_s instructions. Expanding sext_inreg + // everywhere would be simpler in this file, but would necessitate large and + // brittle patterns to undo the expansion and select extract_lane_s + // instructions. assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); - if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - const SDValue &Extract = Op.getOperand(0); - MVT VecT = Extract.getOperand(0).getSimpleValueType(); - MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode()) - ->getVT() - .getSimpleVT(); - MVT ExtractedVecT = - MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); - if (ExtractedVecT == VecT) - return Op; - // Bitcast vector to appropriate type to ensure ISel pattern coverage - const SDValue &Index = Extract.getOperand(1); - unsigned IndexVal = - static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue(); - unsigned Scale = - ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); - assert(Scale > 1); - SDValue NewIndex = - DAG.getConstant(IndexVal * Scale, DL, Index.getValueType()); - SDValue NewExtract = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), - DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), - NewExtract, Op.getOperand(1)); - } - // Otherwise expand - return SDValue(); + if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + const SDValue &Extract = Op.getOperand(0); + MVT VecT = Extract.getOperand(0).getSimpleValueType(); + if (VecT.getVectorElementType().getSizeInBits() > 32) + return SDValue(); + MVT ExtractedLaneT = + cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT(); + MVT ExtractedVecT = + MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); + if (ExtractedVecT == VecT) + return Op; + + // Bitcast vector to appropriate type to ensure ISel pattern coverage + const SDNode *Index = Extract.getOperand(1).getNode(); + if (!isa<ConstantSDNode>(Index)) + return SDValue(); + unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue(); + unsigned Scale = + ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); + assert(Scale > 1); + SDValue NewIndex = + DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0)); + SDValue NewExtract = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), + DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract, + Op.getOperand(1)); } SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, @@ -1311,7 +1426,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, const EVT VecT = Op.getValueType(); const EVT LaneT = Op.getOperand(0).getValueType(); const size_t Lanes = Op.getNumOperands(); - bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8; + bool CanSwizzle = VecT == MVT::v16i8; // BUILD_VECTORs are lowered to the instruction that initializes the highest // possible number of lanes at once followed by a sequence of replace_lane @@ -1410,38 +1525,37 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, // original instruction std::function<bool(size_t, const SDValue &)> IsLaneConstructed; SDValue Result; - if (Subtarget->hasUnimplementedSIMD128()) { - // Prefer swizzles over vector consts over splats - if (NumSwizzleLanes >= NumSplatLanes && - NumSwizzleLanes >= NumConstantLanes) { - Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, - SwizzleIndices); - auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); - IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { - return Swizzled == GetSwizzleSrcs(I, Lane); - }; - } else if (NumConstantLanes >= NumSplatLanes) { - SmallVector<SDValue, 16> ConstLanes; - for (const SDValue &Lane : Op->op_values()) { - if (IsConstant(Lane)) { - ConstLanes.push_back(Lane); - } else if (LaneT.isFloatingPoint()) { - ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); - } else { - ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); - } + // Prefer swizzles over vector consts over splats + if (NumSwizzleLanes >= NumSplatLanes && + (!Subtarget->hasUnimplementedSIMD128() || + NumSwizzleLanes >= NumConstantLanes)) { + Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, + SwizzleIndices); + auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); + IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { + return Swizzled == GetSwizzleSrcs(I, Lane); + }; + } else if (NumConstantLanes >= NumSplatLanes && + Subtarget->hasUnimplementedSIMD128()) { + SmallVector<SDValue, 16> ConstLanes; + for (const SDValue &Lane : Op->op_values()) { + if (IsConstant(Lane)) { + ConstLanes.push_back(Lane); + } else if (LaneT.isFloatingPoint()) { + ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); + } else { + ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); } - Result = DAG.getBuildVector(VecT, DL, ConstLanes); - IsLaneConstructed = [&](size_t _, const SDValue &Lane) { - return IsConstant(Lane); - }; } + Result = DAG.getBuildVector(VecT, DL, ConstLanes); + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; } if (!Result) { // Use a splat, but possibly a load_splat LoadSDNode *SplattedLoad; - if (Subtarget->hasUnimplementedSIMD128() && - (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && + if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { Result = DAG.getMemIntrinsicNode( WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT), @@ -1502,7 +1616,6 @@ SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op, // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes // (which return i64x2 results) as well. So instead we manually unroll i64x2 // comparisons here. - assert(Subtarget->hasUnimplementedSIMD128()); assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64); SmallVector<SDValue, 2> LHS, RHS; DAG.ExtractVectorElements(Op->getOperand(0), LHS); @@ -1536,22 +1649,25 @@ static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { return DAG.UnrollVectorOp(Op.getNode()); // Otherwise mask the shift value to get proper semantics from 32-bit shift SDLoc DL(Op); - SDValue ShiftVal = Op.getOperand(1); - uint64_t MaskVal = LaneT.getSizeInBits() - 1; - SDValue MaskedShiftVal = DAG.getNode( - ISD::AND, // mask opcode - DL, ShiftVal.getValueType(), // masked value type - ShiftVal, // original shift value operand - DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand - ); - - return DAG.UnrollVectorOp( - DAG.getNode(Op.getOpcode(), // original shift opcode - DL, Op.getValueType(), // original return type - Op.getOperand(0), // original vector operand, - MaskedShiftVal // new masked shift value operand - ) - .getNode()); + size_t NumLanes = Op.getSimpleValueType().getVectorNumElements(); + SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32); + unsigned ShiftOpcode = Op.getOpcode(); + SmallVector<SDValue, 16> ShiftedElements; + DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32); + SmallVector<SDValue, 16> ShiftElements; + DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32); + SmallVector<SDValue, 16> UnrolledOps; + for (size_t i = 0; i < NumLanes; ++i) { + SDValue MaskedShiftValue = + DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask); + SDValue ShiftedValue = ShiftedElements[i]; + if (ShiftOpcode == ISD::SRA) + ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, + ShiftedValue, DAG.getValueType(LaneT)); + UnrolledOps.push_back( + DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue)); + } + return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps); } SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, @@ -1561,19 +1677,13 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, // Only manually lower vector shifts assert(Op.getSimpleValueType().isVector()); - // Unroll non-splat vector shifts - BuildVectorSDNode *ShiftVec; - SDValue SplatVal; - if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) || - !(SplatVal = ShiftVec->getSplatValue())) + auto ShiftVal = DAG.getSplatValue(Op.getOperand(1)); + if (!ShiftVal) return unrollVectorShift(Op, DAG); - // All splats except i64x2 const splats are handled by patterns - auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal); - if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64) - return Op; + // Use anyext because none of the high bits can affect the shift + ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32); - // i64x2 const splats are custom lowered to avoid unnecessary wraps unsigned Opcode; switch (Op.getOpcode()) { case ISD::SHL: @@ -1588,11 +1698,45 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, default: llvm_unreachable("unexpected opcode"); } - APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32); - return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), - DAG.getConstant(Shift, DL, MVT::i32)); + + return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal); } //===----------------------------------------------------------------------===// -// WebAssembly Optimization Hooks +// Custom DAG combine hooks //===----------------------------------------------------------------------===// +static SDValue +performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + auto &DAG = DCI.DAG; + auto Shuffle = cast<ShuffleVectorSDNode>(N); + + // Hoist vector bitcasts that don't change the number of lanes out of unary + // shuffles, where they are less likely to get in the way of other combines. + // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) -> + // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask)))) + SDValue Bitcast = N->getOperand(0); + if (Bitcast.getOpcode() != ISD::BITCAST) + return SDValue(); + if (!N->getOperand(1).isUndef()) + return SDValue(); + SDValue CastOp = Bitcast.getOperand(0); + MVT SrcType = CastOp.getSimpleValueType(); + MVT DstType = Bitcast.getSimpleValueType(); + if (!SrcType.is128BitVector() || + SrcType.getVectorNumElements() != DstType.getVectorNumElements()) + return SDValue(); + SDValue NewShuffle = DAG.getVectorShuffle( + SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask()); + return DAG.getBitcast(DstType, NewShuffle); +} + +SDValue +WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: + return SDValue(); + case ISD::VECTOR_SHUFFLE: + return performVECTOR_SHUFFLECombine(N, DCI); + } +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 58e088a0ba503..b8e6123775292 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -39,7 +39,6 @@ enum NodeType : unsigned { } // end namespace WebAssemblyISD class WebAssemblySubtarget; -class WebAssemblyTargetMachine; class WebAssemblyTargetLowering final : public TargetLowering { public: @@ -119,6 +118,11 @@ private: SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; + + // Custom DAG combine hooks + SDValue + PerformDAGCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) const override; }; namespace WebAssembly { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index a9a99d38f9f16..256b77e33db9e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -13,10 +13,11 @@ let UseNamedOperandTable = 1 in multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, - list<dag> pattern_r, string asmstr_r = "", - string asmstr_s = "", bits<32> atomic_op = -1> { + list<dag> pattern_r, string asmstr_r, + string asmstr_s, bits<32> atomic_op, + string is64 = "false"> { defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, - !or(0xfe00, !and(0xff, atomic_op))>, + !or(0xfe00, !and(0xff, atomic_op)), is64>, Requires<[HasAtomics]>; } @@ -32,85 +33,166 @@ multiclass ATOMIC_NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "", //===----------------------------------------------------------------------===// let hasSideEffects = 1 in { -defm ATOMIC_NOTIFY : +defm ATOMIC_NOTIFY_A32 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", - "atomic.notify \t${off}${p2align}", 0x00>; + "atomic.notify \t${off}${p2align}", 0x00, "false">; +defm ATOMIC_NOTIFY_A64 : + ATOMIC_I<(outs I32:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$count), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", + "atomic.notify \t${off}${p2align}", 0x00, "true">; let mayLoad = 1 in { -defm ATOMIC_WAIT_I32 : +defm ATOMIC_WAIT_I32_A32 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "i32.atomic.wait \t${off}${p2align}", 0x01>; -defm ATOMIC_WAIT_I64 : + "i32.atomic.wait \t${off}${p2align}", 0x01, "false">; +defm ATOMIC_WAIT_I32_A64 : + ATOMIC_I<(outs I32:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$exp, + I64:$timeout), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "i32.atomic.wait \t${off}${p2align}", 0x01, "true">; +defm ATOMIC_WAIT_I64_A32 : ATOMIC_I<(outs I32:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", - "i64.atomic.wait \t${off}${p2align}", 0x02>; + "i64.atomic.wait \t${off}${p2align}", 0x02, "false">; +defm ATOMIC_WAIT_I64_A64 : + ATOMIC_I<(outs I32:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I64:$exp, + I64:$timeout), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "i64.atomic.wait \t${off}${p2align}", 0x02, "true">; } // mayLoad = 1 } // hasSideEffects = 1 let Predicates = [HasAtomics] in { // Select notifys with no constant offset. -def NotifyPatNoOffset : +def NotifyPatNoOffset_A32 : Pat<(i32 (int_wasm_atomic_notify I32:$addr, I32:$count)), - (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>; + (ATOMIC_NOTIFY_A32 0, 0, I32:$addr, I32:$count)>, + Requires<[HasAddr32]>; +def NotifyPatNoOffset_A64 : + Pat<(i32 (int_wasm_atomic_notify I64:$addr, I32:$count)), + (ATOMIC_NOTIFY_A64 0, 0, I64:$addr, I32:$count)>, + Requires<[HasAddr64]>; // Select notifys with a constant offset. // Pattern with address + immediate offset -class NotifyPatImmOff<PatFrag operand> : - Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off), I32:$count)), - (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>; -def : NotifyPatImmOff<regPlusImm>; -def : NotifyPatImmOff<or_is_add>; +multiclass NotifyPatImmOff<PatFrag operand, string inst> { + def : Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off), + I32:$count)), + (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, I32:$count)>, + Requires<[HasAddr32]>; + def : Pat<(i32 (int_wasm_atomic_notify (operand I64:$addr, imm:$off), + I32:$count)), + (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, I32:$count)>, + Requires<[HasAddr64]>; +} +defm : NotifyPatImmOff<regPlusImm, "ATOMIC_NOTIFY">; +defm : NotifyPatImmOff<or_is_add, "ATOMIC_NOTIFY">; // Select notifys with just a constant offset. -def NotifyPatOffsetOnly : +def NotifyPatOffsetOnly_A32 : + Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)), + (ATOMIC_NOTIFY_A32 0, imm:$off, (CONST_I32 0), I32:$count)>, + Requires<[HasAddr32]>; +def NotifyPatOffsetOnly_A64 : Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)), - (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>; + (ATOMIC_NOTIFY_A64 0, imm:$off, (CONST_I64 0), I32:$count)>, + Requires<[HasAddr64]>; -def NotifyPatGlobalAddrOffOnly : +def NotifyPatGlobalAddrOffOnly_A32 : Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off), I32:$count)), - (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>; + (ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>, + Requires<[HasAddr32]>; +def NotifyPatGlobalAddrOffOnly_A64 : + Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off), + I32:$count)), + (ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count)>, + Requires<[HasAddr64]>; // Select waits with no constant offset. -class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> : - Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)), - (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>; -def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>; -def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>; +multiclass WaitPatNoOffset<ValueType ty, Intrinsic kind, + string inst> { + def : Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)), + (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, I64:$timeout)>, + Requires<[HasAddr32]>; + def : Pat<(i32 (kind I64:$addr, ty:$exp, I64:$timeout)), + (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, I64:$timeout)>, + Requires<[HasAddr64]>; +} +defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">; +defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">; +defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">; +defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">; // Select waits with a constant offset. // Pattern with address + immediate offset -class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> : - Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)), - (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>; -def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>; -def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>; -def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>; -def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>; - -// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset. -class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> : - Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), - (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>; -def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>; -def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>; - -class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> : - Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)), - (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>; -def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>; -def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>; +multiclass WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, + string inst> { + def : Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)), + (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, + I64:$timeout)>, + Requires<[HasAddr32]>; + def : Pat<(i32 (kind (operand I64:$addr, imm:$off), ty:$exp, I64:$timeout)), + (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, + I64:$timeout)>, + Requires<[HasAddr64]>; +} +defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, + "ATOMIC_WAIT_I32">; +defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, + "ATOMIC_WAIT_I32">; +defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, + "ATOMIC_WAIT_I64">; +defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, + "ATOMIC_WAIT_I64">; + +// Select wait_i32, "ATOMIC_WAIT_I32s with just a constant offset. +multiclass WaitPatOffsetOnly<ValueType ty, Intrinsic kind, string inst> { + def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), + (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp, + I64:$timeout)>, + Requires<[HasAddr32]>; + def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), + (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp, + I64:$timeout)>, + Requires<[HasAddr64]>; +} +defm : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">; +defm : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">; + +multiclass WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, string inst> { + def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, + I64:$timeout)), + (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, + I64:$timeout)>, + Requires<[HasAddr32]>; + def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, + I64:$timeout)), + (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp, + I64:$timeout)>, + Requires<[HasAddr64]>; +} +defm : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, + "ATOMIC_WAIT_I32">; +defm : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, + "ATOMIC_WAIT_I64">; } // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// @@ -131,8 +213,8 @@ defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence", //===----------------------------------------------------------------------===// multiclass AtomicLoad<WebAssemblyRegClass rc, string name, int atomic_op> { - defm "" : WebAssemblyLoad<rc, name, !or(0xfe00, !and(0xff, atomic_op))>, - Requires<[HasAtomics]>; + defm "" : WebAssemblyLoad<rc, name, !or(0xfe00, !and(0xff, atomic_op)), + [HasAtomics]>; } defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>; @@ -140,23 +222,23 @@ defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>; // Select loads with no constant offset. let Predicates = [HasAtomics] in { -def : LoadPatNoOffset<i32, atomic_load_32, ATOMIC_LOAD_I32>; -def : LoadPatNoOffset<i64, atomic_load_64, ATOMIC_LOAD_I64>; +defm : LoadPatNoOffset<i32, atomic_load_32, "ATOMIC_LOAD_I32">; +defm : LoadPatNoOffset<i64, atomic_load_64, "ATOMIC_LOAD_I64">; // Select loads with a constant offset. // Pattern with address + immediate offset -def : LoadPatImmOff<i32, atomic_load_32, regPlusImm, ATOMIC_LOAD_I32>; -def : LoadPatImmOff<i64, atomic_load_64, regPlusImm, ATOMIC_LOAD_I64>; -def : LoadPatImmOff<i32, atomic_load_32, or_is_add, ATOMIC_LOAD_I32>; -def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>; +defm : LoadPatImmOff<i32, atomic_load_32, regPlusImm, "ATOMIC_LOAD_I32">; +defm : LoadPatImmOff<i64, atomic_load_64, regPlusImm, "ATOMIC_LOAD_I64">; +defm : LoadPatImmOff<i32, atomic_load_32, or_is_add, "ATOMIC_LOAD_I32">; +defm : LoadPatImmOff<i64, atomic_load_64, or_is_add, "ATOMIC_LOAD_I64">; // Select loads with just a constant offset. -def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>; -def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>; +defm : LoadPatOffsetOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">; +defm : LoadPatOffsetOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">; -def : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>; -def : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>; +defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">; +defm : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">; } // Predicates = [HasAtomics] @@ -205,62 +287,62 @@ def sext_aload_16_64 : let Predicates = [HasAtomics] in { // Select zero-extending loads with no constant offset. -def : LoadPatNoOffset<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>; -def : LoadPatNoOffset<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>; -def : LoadPatNoOffset<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>; -def : LoadPatNoOffset<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>; -def : LoadPatNoOffset<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>; +defm : LoadPatNoOffset<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatNoOffset<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatNoOffset<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatNoOffset<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">; +defm : LoadPatNoOffset<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">; // Select sign-extending loads with no constant offset -def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>; -def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>; -def : LoadPatNoOffset<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>; -def : LoadPatNoOffset<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>; +defm : LoadPatNoOffset<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatNoOffset<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatNoOffset<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatNoOffset<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; // 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s // Zero-extending loads with constant offset -def : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, ATOMIC_LOAD8_U_I32>; -def : LoadPatImmOff<i32, zext_aload_16_32, regPlusImm, ATOMIC_LOAD16_U_I32>; -def : LoadPatImmOff<i32, zext_aload_8_32, or_is_add, ATOMIC_LOAD8_U_I32>; -def : LoadPatImmOff<i32, zext_aload_16_32, or_is_add, ATOMIC_LOAD16_U_I32>; -def : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>; -def : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>; -def : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, ATOMIC_LOAD32_U_I64>; -def : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>; -def : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>; -def : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, ATOMIC_LOAD32_U_I64>; +defm : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatImmOff<i32, zext_aload_16_32, regPlusImm, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatImmOff<i32, zext_aload_8_32, or_is_add, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatImmOff<i32, zext_aload_16_32, or_is_add, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">; +defm : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, "ATOMIC_LOAD32_U_I64">; +defm : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">; +defm : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, "ATOMIC_LOAD32_U_I64">; // Sign-extending loads with constant offset -def : LoadPatImmOff<i32, atomic_load_8, regPlusImm, ATOMIC_LOAD8_U_I32>; -def : LoadPatImmOff<i32, atomic_load_16, regPlusImm, ATOMIC_LOAD16_U_I32>; -def : LoadPatImmOff<i32, atomic_load_8, or_is_add, ATOMIC_LOAD8_U_I32>; -def : LoadPatImmOff<i32, atomic_load_16, or_is_add, ATOMIC_LOAD16_U_I32>; -def : LoadPatImmOff<i64, sext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>; -def : LoadPatImmOff<i64, sext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>; -def : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>; -def : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>; +defm : LoadPatImmOff<i32, atomic_load_8, regPlusImm, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatImmOff<i32, atomic_load_16, regPlusImm, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatImmOff<i32, atomic_load_8, or_is_add, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatImmOff<i32, atomic_load_16, or_is_add, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatImmOff<i64, sext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatImmOff<i64, sext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">; +defm : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">; // No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 // Extending loads with just a constant offset -def : LoadPatOffsetOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>; -def : LoadPatOffsetOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>; -def : LoadPatOffsetOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>; -def : LoadPatOffsetOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>; -def : LoadPatOffsetOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>; -def : LoadPatOffsetOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>; -def : LoadPatOffsetOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>; -def : LoadPatOffsetOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>; -def : LoadPatOffsetOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>; - -def : LoadPatGlobalAddrOffOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>; -def : LoadPatGlobalAddrOffOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>; -def : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>; -def : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>; -def : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>; -def : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>; -def : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>; -def : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>; -def : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>; +defm : LoadPatOffsetOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatOffsetOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatOffsetOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatOffsetOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">; +defm : LoadPatOffsetOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">; +defm : LoadPatOffsetOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatOffsetOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatOffsetOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatOffsetOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; + +defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; } // Predicates = [HasAtomics] @@ -269,8 +351,8 @@ def : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>; //===----------------------------------------------------------------------===// multiclass AtomicStore<WebAssemblyRegClass rc, string name, int atomic_op> { - defm "" : WebAssemblyStore<rc, name, !or(0xfe00, !and(0xff, atomic_op))>, - Requires<[HasAtomics]>; + defm "" : WebAssemblyStore<rc, name, !or(0xfe00, !and(0xff, atomic_op)), + [HasAtomics]>; } defm ATOMIC_STORE_I32 : AtomicStore<I32, "i32.atomic.store", 0x17>; @@ -284,33 +366,54 @@ defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>; let Predicates = [HasAtomics] in { // Select stores with no constant offset. -class AStorePatNoOffset<ValueType ty, PatFrag kind, NI inst> : - Pat<(kind I32:$addr, ty:$val), (inst 0, 0, I32:$addr, ty:$val)>; -def : AStorePatNoOffset<i32, atomic_store_32, ATOMIC_STORE_I32>; -def : AStorePatNoOffset<i64, atomic_store_64, ATOMIC_STORE_I64>; +multiclass AStorePatNoOffset<ValueType ty, PatFrag kind, string inst> { + def : Pat<(kind I32:$addr, ty:$val), + (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind I64:$addr, ty:$val), + (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>, + Requires<[HasAddr64]>; +} +defm : AStorePatNoOffset<i32, atomic_store_32, "ATOMIC_STORE_I32">; +defm : AStorePatNoOffset<i64, atomic_store_64, "ATOMIC_STORE_I64">; // Select stores with a constant offset. // Pattern with address + immediate offset -class AStorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : - Pat<(kind (operand I32:$addr, imm:$off), ty:$val), - (inst 0, imm:$off, I32:$addr, ty:$val)>; -def : AStorePatImmOff<i32, atomic_store_32, regPlusImm, ATOMIC_STORE_I32>; -def : AStorePatImmOff<i64, atomic_store_64, regPlusImm, ATOMIC_STORE_I64>; -def : AStorePatImmOff<i32, atomic_store_32, or_is_add, ATOMIC_STORE_I32>; -def : AStorePatImmOff<i64, atomic_store_64, or_is_add, ATOMIC_STORE_I64>; +multiclass AStorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, + string inst> { + def : Pat<(kind (operand I32:$addr, imm:$off), ty:$val), + (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind (operand I64:$addr, imm:$off), ty:$val), + (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>, + Requires<[HasAddr64]>; +} +defm : AStorePatImmOff<i32, atomic_store_32, regPlusImm, "ATOMIC_STORE_I32">; +defm : AStorePatImmOff<i64, atomic_store_64, regPlusImm, "ATOMIC_STORE_I64">; // Select stores with just a constant offset. -class AStorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; -def : AStorePatOffsetOnly<i32, atomic_store_32, ATOMIC_STORE_I32>; -def : AStorePatOffsetOnly<i64, atomic_store_64, ATOMIC_STORE_I64>; - -class AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val), - (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>; -def : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, ATOMIC_STORE_I32>; -def : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>; +multiclass AStorePatOffsetOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(kind imm:$off, ty:$val), + (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind imm:$off, ty:$val), + (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>, + Requires<[HasAddr64]>; +} +defm : AStorePatOffsetOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">; +defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">; + +multiclass AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val), + (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val), + (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>, + Requires<[HasAddr64]>; +} +defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">; +defm : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">; } // Predicates = [HasAtomics] @@ -336,36 +439,40 @@ def trunc_astore_32_64 : trunc_astore_64<atomic_store_32>; let Predicates = [HasAtomics] in { // Truncating stores with no constant offset -def : AStorePatNoOffset<i32, atomic_store_8, ATOMIC_STORE8_I32>; -def : AStorePatNoOffset<i32, atomic_store_16, ATOMIC_STORE16_I32>; -def : AStorePatNoOffset<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>; -def : AStorePatNoOffset<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>; -def : AStorePatNoOffset<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>; +defm : AStorePatNoOffset<i32, atomic_store_8, "ATOMIC_STORE8_I32">; +defm : AStorePatNoOffset<i32, atomic_store_16, "ATOMIC_STORE16_I32">; +defm : AStorePatNoOffset<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">; +defm : AStorePatNoOffset<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">; +defm : AStorePatNoOffset<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">; // Truncating stores with a constant offset -def : AStorePatImmOff<i32, atomic_store_8, regPlusImm, ATOMIC_STORE8_I32>; -def : AStorePatImmOff<i32, atomic_store_16, regPlusImm, ATOMIC_STORE16_I32>; -def : AStorePatImmOff<i64, trunc_astore_8_64, regPlusImm, ATOMIC_STORE8_I64>; -def : AStorePatImmOff<i64, trunc_astore_16_64, regPlusImm, ATOMIC_STORE16_I64>; -def : AStorePatImmOff<i64, trunc_astore_32_64, regPlusImm, ATOMIC_STORE32_I64>; -def : AStorePatImmOff<i32, atomic_store_8, or_is_add, ATOMIC_STORE8_I32>; -def : AStorePatImmOff<i32, atomic_store_16, or_is_add, ATOMIC_STORE16_I32>; -def : AStorePatImmOff<i64, trunc_astore_8_64, or_is_add, ATOMIC_STORE8_I64>; -def : AStorePatImmOff<i64, trunc_astore_16_64, or_is_add, ATOMIC_STORE16_I64>; -def : AStorePatImmOff<i64, trunc_astore_32_64, or_is_add, ATOMIC_STORE32_I64>; +defm : AStorePatImmOff<i32, atomic_store_8, regPlusImm, "ATOMIC_STORE8_I32">; +defm : AStorePatImmOff<i32, atomic_store_16, regPlusImm, "ATOMIC_STORE16_I32">; +defm : AStorePatImmOff<i64, trunc_astore_8_64, regPlusImm, "ATOMIC_STORE8_I64">; +defm : AStorePatImmOff<i64, trunc_astore_16_64, regPlusImm, + "ATOMIC_STORE16_I64">; +defm : AStorePatImmOff<i64, trunc_astore_32_64, regPlusImm, + "ATOMIC_STORE32_I64">; +defm : AStorePatImmOff<i32, atomic_store_8, or_is_add, "ATOMIC_STORE8_I32">; +defm : AStorePatImmOff<i32, atomic_store_16, or_is_add, "ATOMIC_STORE16_I32">; +defm : AStorePatImmOff<i64, trunc_astore_8_64, or_is_add, "ATOMIC_STORE8_I64">; +defm : AStorePatImmOff<i64, trunc_astore_16_64, or_is_add, + "ATOMIC_STORE16_I64">; +defm : AStorePatImmOff<i64, trunc_astore_32_64, or_is_add, + "ATOMIC_STORE32_I64">; // Truncating stores with just a constant offset -def : AStorePatOffsetOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>; -def : AStorePatOffsetOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>; -def : AStorePatOffsetOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>; -def : AStorePatOffsetOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>; -def : AStorePatOffsetOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>; - -def : AStorePatGlobalAddrOffOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>; -def : AStorePatGlobalAddrOffOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>; -def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>; -def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>; -def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>; +defm : AStorePatOffsetOnly<i32, atomic_store_8, "ATOMIC_STORE8_I32">; +defm : AStorePatOffsetOnly<i32, atomic_store_16, "ATOMIC_STORE16_I32">; +defm : AStorePatOffsetOnly<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">; +defm : AStorePatOffsetOnly<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">; +defm : AStorePatOffsetOnly<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">; + +defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_8, "ATOMIC_STORE8_I32">; +defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_16, "ATOMIC_STORE16_I32">; +defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">; +defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">; +defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">; } // Predicates = [HasAtomics] @@ -375,12 +482,18 @@ def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>; multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string name, int atomic_op> { - defm "" : + defm "_A32" : ATOMIC_I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), (outs), (ins P2Align:$p2align, offset32_op:$off), [], !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"), - !strconcat(name, "\t${off}${p2align}"), atomic_op>; + !strconcat(name, "\t${off}${p2align}"), atomic_op, "false">; + defm "_A64" : + ATOMIC_I<(outs rc:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$val), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"), + !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">; } defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0x1e>; @@ -464,56 +577,78 @@ defm ATOMIC_RMW32_U_XCHG_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0x47>; // Select binary RMWs with no constant offset. -class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind I32:$addr, ty:$val)), (inst 0, 0, I32:$addr, ty:$val)>; +multiclass BinRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind I32:$addr, ty:$val)), + (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind I64:$addr, ty:$val)), + (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>, + Requires<[HasAddr64]>; +} // Select binary RMWs with a constant offset. // Pattern with address + immediate offset -class BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : - Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), - (inst 0, imm:$off, I32:$addr, ty:$val)>; +multiclass BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, + string inst> { + def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), + (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$val)), + (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>, + Requires<[HasAddr64]>; +} // Select binary RMWs with just a constant offset. -class BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind imm:$off, ty:$val)), - (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; +multiclass BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind imm:$off, ty:$val)), + (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind imm:$off, ty:$val)), + (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>, + Requires<[HasAddr64]>; +} -class BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)), - (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>; +multiclass BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> { + def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)), + (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)), + (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>, + Requires<[HasAddr64]>; +} // Patterns for various addressing modes. -multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32, - NI inst_64> { - def : BinRMWPatNoOffset<i32, rmw_32, inst_32>; - def : BinRMWPatNoOffset<i64, rmw_64, inst_64>; +multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32, + string inst_64> { + defm : BinRMWPatNoOffset<i32, rmw_32, inst_32>; + defm : BinRMWPatNoOffset<i64, rmw_64, inst_64>; - def : BinRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>; - def : BinRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>; - def : BinRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>; - def : BinRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>; + defm : BinRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>; + defm : BinRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>; + defm : BinRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>; + defm : BinRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>; - def : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>; - def : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>; + defm : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>; + defm : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>; - def : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>; - def : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; + defm : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>; + defm : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; } let Predicates = [HasAtomics] in { -defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64, ATOMIC_RMW_ADD_I32, - ATOMIC_RMW_ADD_I64>; -defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64, ATOMIC_RMW_SUB_I32, - ATOMIC_RMW_SUB_I64>; -defm : BinRMWPattern<atomic_load_and_32, atomic_load_and_64, ATOMIC_RMW_AND_I32, - ATOMIC_RMW_AND_I64>; -defm : BinRMWPattern<atomic_load_or_32, atomic_load_or_64, ATOMIC_RMW_OR_I32, - ATOMIC_RMW_OR_I64>; -defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64, ATOMIC_RMW_XOR_I32, - ATOMIC_RMW_XOR_I64>; -defm : BinRMWPattern<atomic_swap_32, atomic_swap_64, ATOMIC_RMW_XCHG_I32, - ATOMIC_RMW_XCHG_I64>; +defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64, + "ATOMIC_RMW_ADD_I32", "ATOMIC_RMW_ADD_I64">; +defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64, + "ATOMIC_RMW_SUB_I32", "ATOMIC_RMW_SUB_I64">; +defm : BinRMWPattern<atomic_load_and_32, atomic_load_and_64, + "ATOMIC_RMW_AND_I32", "ATOMIC_RMW_AND_I64">; +defm : BinRMWPattern<atomic_load_or_32, atomic_load_or_64, + "ATOMIC_RMW_OR_I32", "ATOMIC_RMW_OR_I64">; +defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64, + "ATOMIC_RMW_XOR_I32", "ATOMIC_RMW_XOR_I64">; +defm : BinRMWPattern<atomic_swap_32, atomic_swap_64, + "ATOMIC_RMW_XCHG_I32", "ATOMIC_RMW_XCHG_I64">; } // Predicates = [HasAtomics] // Truncating & zero-extending binary RMW patterns. @@ -556,87 +691,93 @@ multiclass BinRMWTruncExtPattern< PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64, NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> { // Truncating-extending binary RMWs with no constant offset - def : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; - def : BinRMWPatNoOffset<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; - def : BinRMWPatNoOffset<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; - def : BinRMWPatNoOffset<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; - def : BinRMWPatNoOffset<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; + defm : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; + defm : BinRMWPatNoOffset<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; + defm : BinRMWPatNoOffset<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; + defm : BinRMWPatNoOffset<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; + defm : BinRMWPatNoOffset<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; - def : BinRMWPatNoOffset<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; - def : BinRMWPatNoOffset<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; - def : BinRMWPatNoOffset<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; - def : BinRMWPatNoOffset<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; + defm : BinRMWPatNoOffset<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; + defm : BinRMWPatNoOffset<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; + defm : BinRMWPatNoOffset<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; + defm : BinRMWPatNoOffset<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; // Truncating-extending binary RMWs with a constant offset - def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; - def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; - def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; - def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; - def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, regPlusImm, inst32_64>; - def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>; - def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>; - def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>; - def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>; - def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, or_is_add, inst32_64>; - - def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; - def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; - def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; - def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; - def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>; - def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>; - def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>; - def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + defm : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + defm : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, regPlusImm, + inst16_32>; + defm : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + defm : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, regPlusImm, + inst16_64>; + defm : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, regPlusImm, + inst32_64>; + defm : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + defm : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + defm : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + defm : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + defm : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, or_is_add, inst32_64>; + + defm : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + defm : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, regPlusImm, + inst16_32>; + defm : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + defm : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, regPlusImm, + inst16_64>; + defm : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + defm : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + defm : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + defm : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>; // Truncating-extending binary RMWs with just a constant offset - def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; - def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; - def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; - def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; - def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; - - def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; - def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; - def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; - def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; - - def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; - def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; - def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; - def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; - def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; - - def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; - def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; - def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; - def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; + defm : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; + defm : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; + defm : BinRMWPatOffsetOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; + defm : BinRMWPatOffsetOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; + defm : BinRMWPatOffsetOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; + + defm : BinRMWPatOffsetOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; + defm : BinRMWPatOffsetOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; + defm : BinRMWPatOffsetOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; + defm : BinRMWPatOffsetOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; + + defm : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; + defm : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; + defm : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; + defm : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; + defm : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; + + defm : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; + defm : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; + defm : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; + defm : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; } let Predicates = [HasAtomics] in { defm : BinRMWTruncExtPattern< atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64, - ATOMIC_RMW8_U_ADD_I32, ATOMIC_RMW16_U_ADD_I32, - ATOMIC_RMW8_U_ADD_I64, ATOMIC_RMW16_U_ADD_I64, ATOMIC_RMW32_U_ADD_I64>; + "ATOMIC_RMW8_U_ADD_I32", "ATOMIC_RMW16_U_ADD_I32", + "ATOMIC_RMW8_U_ADD_I64", "ATOMIC_RMW16_U_ADD_I64", "ATOMIC_RMW32_U_ADD_I64">; defm : BinRMWTruncExtPattern< atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32, atomic_load_sub_64, - ATOMIC_RMW8_U_SUB_I32, ATOMIC_RMW16_U_SUB_I32, - ATOMIC_RMW8_U_SUB_I64, ATOMIC_RMW16_U_SUB_I64, ATOMIC_RMW32_U_SUB_I64>; + "ATOMIC_RMW8_U_SUB_I32", "ATOMIC_RMW16_U_SUB_I32", + "ATOMIC_RMW8_U_SUB_I64", "ATOMIC_RMW16_U_SUB_I64", "ATOMIC_RMW32_U_SUB_I64">; defm : BinRMWTruncExtPattern< atomic_load_and_8, atomic_load_and_16, atomic_load_and_32, atomic_load_and_64, - ATOMIC_RMW8_U_AND_I32, ATOMIC_RMW16_U_AND_I32, - ATOMIC_RMW8_U_AND_I64, ATOMIC_RMW16_U_AND_I64, ATOMIC_RMW32_U_AND_I64>; + "ATOMIC_RMW8_U_AND_I32", "ATOMIC_RMW16_U_AND_I32", + "ATOMIC_RMW8_U_AND_I64", "ATOMIC_RMW16_U_AND_I64", "ATOMIC_RMW32_U_AND_I64">; defm : BinRMWTruncExtPattern< atomic_load_or_8, atomic_load_or_16, atomic_load_or_32, atomic_load_or_64, - ATOMIC_RMW8_U_OR_I32, ATOMIC_RMW16_U_OR_I32, - ATOMIC_RMW8_U_OR_I64, ATOMIC_RMW16_U_OR_I64, ATOMIC_RMW32_U_OR_I64>; + "ATOMIC_RMW8_U_OR_I32", "ATOMIC_RMW16_U_OR_I32", + "ATOMIC_RMW8_U_OR_I64", "ATOMIC_RMW16_U_OR_I64", "ATOMIC_RMW32_U_OR_I64">; defm : BinRMWTruncExtPattern< atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32, atomic_load_xor_64, - ATOMIC_RMW8_U_XOR_I32, ATOMIC_RMW16_U_XOR_I32, - ATOMIC_RMW8_U_XOR_I64, ATOMIC_RMW16_U_XOR_I64, ATOMIC_RMW32_U_XOR_I64>; + "ATOMIC_RMW8_U_XOR_I32", "ATOMIC_RMW16_U_XOR_I32", + "ATOMIC_RMW8_U_XOR_I64", "ATOMIC_RMW16_U_XOR_I64", "ATOMIC_RMW32_U_XOR_I64">; defm : BinRMWTruncExtPattern< atomic_swap_8, atomic_swap_16, atomic_swap_32, atomic_swap_64, - ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32, - ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>; + "ATOMIC_RMW8_U_XCHG_I32", "ATOMIC_RMW16_U_XCHG_I32", + "ATOMIC_RMW8_U_XCHG_I64", "ATOMIC_RMW16_U_XCHG_I64", + "ATOMIC_RMW32_U_XCHG_I64">; } // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// @@ -651,13 +792,20 @@ defm : BinRMWTruncExtPattern< multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name, int atomic_op> { - defm "" : + defm "_A32" : ATOMIC_I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp, rc:$new_), (outs), (ins P2Align:$p2align, offset32_op:$off), [], !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"), - !strconcat(name, "\t${off}${p2align}"), atomic_op>; + !strconcat(name, "\t${off}${p2align}"), atomic_op, "false">; + defm "_A64" : + ATOMIC_I<(outs rc:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$exp, + rc:$new_), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"), + !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">; } defm ATOMIC_RMW_CMPXCHG_I32 : @@ -676,47 +824,70 @@ defm ATOMIC_RMW32_U_CMPXCHG_I64 : WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0x4e>; // Select ternary RMWs with no constant offset. -class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)), - (inst 0, 0, I32:$addr, ty:$exp, ty:$new)>; +multiclass TerRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)), + (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, ty:$new)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind I64:$addr, ty:$exp, ty:$new)), + (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, ty:$new)>, + Requires<[HasAddr64]>; +} // Select ternary RMWs with a constant offset. // Pattern with address + immediate offset -class TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : - Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), - (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>; +multiclass TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, + string inst> { + def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), + (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$exp, ty:$new)), + (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, ty:$new)>, + Requires<[HasAddr64]>; +} // Select ternary RMWs with just a constant offset. -class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), - (inst 0, imm:$off, (CONST_I32 0), ty:$exp, ty:$new)>; +multiclass TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), + (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp, + ty:$new)>; + def : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), + (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp, + ty:$new)>; +} -class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)), - (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>; +multiclass TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)), + (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, + ty:$new)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)), + (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp, + ty:$new)>, + Requires<[HasAddr64]>; +} // Patterns for various addressing modes. -multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32, - NI inst_64> { - def : TerRMWPatNoOffset<i32, rmw_32, inst_32>; - def : TerRMWPatNoOffset<i64, rmw_64, inst_64>; +multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32, + string inst_64> { + defm : TerRMWPatNoOffset<i32, rmw_32, inst_32>; + defm : TerRMWPatNoOffset<i64, rmw_64, inst_64>; - def : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>; - def : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>; - def : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>; - def : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>; + defm : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>; + defm : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>; + defm : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>; + defm : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>; - def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>; - def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>; + defm : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>; + defm : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>; - def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>; - def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; + defm : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>; + defm : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; } let Predicates = [HasAtomics] in defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64, - ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>; + "ATOMIC_RMW_CMPXCHG_I32", "ATOMIC_RMW_CMPXCHG_I64">; // Truncating & zero-extending ternary RMW patterns. // DAG legalization & optimization before instruction selection may introduce @@ -759,67 +930,73 @@ class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>; // Patterns for various addressing modes for truncating-extending ternary RMWs. multiclass TerRMWTruncExtPattern< PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64, - NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> { + string inst8_32, string inst16_32, string inst8_64, string inst16_64, + string inst32_64> { // Truncating-extending ternary RMWs with no constant offset - def : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; - def : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; - def : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; - def : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; - def : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; + defm : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; + defm : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; + defm : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; + defm : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; + defm : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; - def : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; - def : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; - def : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; - def : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; + defm : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; + defm : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; + defm : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; + defm : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; // Truncating-extending ternary RMWs with a constant offset - def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; - def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; - def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; - def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; - def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm, inst32_64>; - def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>; - def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>; - def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>; - def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>; - def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>; - - def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; - def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; - def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; - def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; - def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>; - def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>; - def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>; - def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + defm : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + defm : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm, + inst16_32>; + defm : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + defm : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm, + inst16_64>; + defm : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm, + inst32_64>; + defm : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + defm : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + defm : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + defm : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + defm : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>; + + defm : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + defm : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm, + inst16_32>; + defm : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + defm : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm, + inst16_64>; + defm : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + defm : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + defm : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + defm : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>; // Truncating-extending ternary RMWs with just a constant offset - def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; - def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; - def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; - def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; - def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; - - def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; - def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; - def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; - def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; - - def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; - def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; - def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; - def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; - def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; - - def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; - def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; - def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; - def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; + defm : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; + defm : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; + defm : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; + defm : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; + defm : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; + + defm : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; + defm : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; + defm : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; + defm : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; + + defm : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; + defm : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; + defm : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; + defm : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; + defm : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; + + defm : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; + defm : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; + defm : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; + defm : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; } let Predicates = [HasAtomics] in defm : TerRMWTruncExtPattern< atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64, - ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, - ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, - ATOMIC_RMW32_U_CMPXCHG_I64>; + "ATOMIC_RMW8_U_CMPXCHG_I32", "ATOMIC_RMW16_U_CMPXCHG_I32", + "ATOMIC_RMW8_U_CMPXCHG_I64", "ATOMIC_RMW16_U_CMPXCHG_I64", + "ATOMIC_RMW32_U_CMPXCHG_I64">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td index 05735cf6d31f7..3e9ef6fbc7eaa 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -33,39 +33,43 @@ def wasm_memset_t : SDTypeProfile<0, 4, def wasm_memset : SDNode<"WebAssemblyISD::MEMORY_FILL", wasm_memset_t, [SDNPHasChain, SDNPMayStore]>; +multiclass BulkMemoryOps<WebAssemblyRegClass rc, string B> { + let mayStore = 1, hasSideEffects = 1 in -defm MEMORY_INIT : +defm MEMORY_INIT_A#B : BULK_I<(outs), - (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest, - I32:$offset, I32:$size), + (ins i32imm_op:$seg, i32imm_op:$idx, rc:$dest, + rc:$offset, rc:$size), (outs), (ins i32imm_op:$seg, i32imm_op:$idx), - [(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest, - I32:$offset, I32:$size - )], + [], "memory.init\t$seg, $idx, $dest, $offset, $size", "memory.init\t$seg, $idx", 0x08>; let hasSideEffects = 1 in defm DATA_DROP : BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg), - [(int_wasm_data_drop (i32 timm:$seg))], + [], "data.drop\t$seg", "data.drop\t$seg", 0x09>; let mayLoad = 1, mayStore = 1 in -defm MEMORY_COPY : +defm MEMORY_COPY_A#B : BULK_I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx, - I32:$dst, I32:$src, I32:$len), + rc:$dst, rc:$src, rc:$len), (outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx), [(wasm_memcpy (i32 imm:$src_idx), (i32 imm:$dst_idx), - I32:$dst, I32:$src, I32:$len + rc:$dst, rc:$src, rc:$len )], "memory.copy\t$src_idx, $dst_idx, $dst, $src, $len", "memory.copy\t$src_idx, $dst_idx", 0x0a>; let mayStore = 1 in -defm MEMORY_FILL : - BULK_I<(outs), (ins i32imm_op:$idx, I32:$dst, I32:$value, I32:$size), +defm MEMORY_FILL_A#B : + BULK_I<(outs), (ins i32imm_op:$idx, rc:$dst, I32:$value, rc:$size), (outs), (ins i32imm_op:$idx), - [(wasm_memset (i32 imm:$idx), I32:$dst, I32:$value, I32:$size)], + [(wasm_memset (i32 imm:$idx), rc:$dst, I32:$value, rc:$size)], "memory.fill\t$idx, $dst, $value, $size", "memory.fill\t$idx", 0x0b>; +} + +defm : BulkMemoryOps<I32, "32">; +defm : BulkMemoryOps<I64, "64">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 703c15d58c93a..b997c1c16fcb4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -23,155 +23,56 @@ defm ADJCALLSTACKUP : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2), [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>; } // Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 -multiclass CALL<ValueType vt, WebAssemblyRegClass rt, string prefix, - list<Predicate> preds = []> { - defm CALL_#vt : - I<(outs rt:$dst), (ins function32_op:$callee, variable_ops), - (outs), (ins function32_op:$callee), - [(set (vt rt:$dst), (WebAssemblycall1 (i32 imm:$callee)))], - !strconcat(prefix, "call\t$dst, $callee"), - !strconcat(prefix, "call\t$callee"), - 0x10>, - Requires<preds>; - let isCodeGenOnly = 1 in - defm PCALL_INDIRECT_#vt : - I<(outs rt:$dst), (ins I32:$callee, variable_ops), - (outs), (ins I32:$callee), - [(set (vt rt:$dst), (WebAssemblycall1 I32:$callee))], - "PSEUDO CALL INDIRECT\t$callee", - "PSEUDO CALL INDIRECT\t$callee">, - Requires<preds>; +let Uses = [SP32, SP64], isCall = 1 in { - defm CALL_INDIRECT_#vt : - I<(outs rt:$dst), - (ins TypeIndex:$type, i32imm:$flags, variable_ops), - (outs), (ins TypeIndex:$type, i32imm:$flags), - [], - !strconcat(prefix, "call_indirect\t$dst"), - !strconcat(prefix, "call_indirect\t$type"), - 0x11>, - Requires<preds>; -} +// CALL should take both variadic arguments and produce variadic results, but +// this is not possible to model directly. Instead, we select calls to a +// CALL_PARAMS taking variadic arguments linked with a CALL_RESULTS that handles +// producing the call's variadic results. We recombine the two in a custom +// inserter hook after DAG ISel, so passes over MachineInstrs will only ever +// observe CALL nodes with all of the expected variadic uses and defs. +let isPseudo = 1 in +defm CALL_PARAMS : + I<(outs), (ins function32_op:$callee, variable_ops), + (outs), (ins function32_op:$callee), [], + "call_params\t$callee", "call_params\t$callee", -1>; -let Uses = [SP32, SP64], isCall = 1 in { -defm "" : CALL<i32, I32, "i32.">; -defm "" : CALL<i64, I64, "i64.">; -defm "" : CALL<f32, F32, "f32.">; -defm "" : CALL<f64, F64, "f64.">; -defm "" : CALL<exnref, EXNREF, "exnref.", [HasExceptionHandling]>; -defm "" : CALL<v16i8, V128, "v128.", [HasSIMD128]>; -defm "" : CALL<v8i16, V128, "v128.", [HasSIMD128]>; -defm "" : CALL<v4i32, V128, "v128.", [HasSIMD128]>; -defm "" : CALL<v2i64, V128, "v128.", [HasSIMD128]>; -defm "" : CALL<v4f32, V128, "v128.", [HasSIMD128]>; -defm "" : CALL<v2f64, V128, "v128.", [HasSIMD128]>; +let variadicOpsAreDefs = 1, usesCustomInserter = 1, isPseudo = 1 in +defm CALL_RESULTS : + I<(outs), (ins variable_ops), (outs), (ins), [], + "call_results", "call_results", -1>; -let IsCanonical = 1 in { -defm CALL_VOID : +let variadicOpsAreDefs = 1, usesCustomInserter = 1, isPseudo = 1 in +defm RET_CALL_RESULTS : + I<(outs), (ins variable_ops), (outs), (ins), [], + "return_call_results", "return_call_results", -1>; + +let variadicOpsAreDefs = 1 in +defm CALL : I<(outs), (ins function32_op:$callee, variable_ops), - (outs), (ins function32_op:$callee), - [(WebAssemblycall0 (i32 imm:$callee))], - "call \t$callee", "call\t$callee", 0x10>; + (outs), (ins function32_op:$callee), [], + "call", "call\t$callee", 0x10>; -let isReturn = 1 in +let variadicOpsAreDefs = 1 in +defm CALL_INDIRECT : + I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops), + (outs), (ins TypeIndex:$type, i32imm:$flags), [], + "call_indirect", "call_indirect\t$type", 0x11>; + +let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in defm RET_CALL : I<(outs), (ins function32_op:$callee, variable_ops), - (outs), (ins function32_op:$callee), - [(WebAssemblyretcall (i32 imm:$callee))], + (outs), (ins function32_op:$callee), [], "return_call \t$callee", "return_call\t$callee", 0x12>, Requires<[HasTailCall]>; -let isCodeGenOnly = 1 in -defm PCALL_INDIRECT_VOID : - I<(outs), (ins I32:$callee, variable_ops), - (outs), (ins I32:$callee), - [(WebAssemblycall0 I32:$callee)], - "PSEUDO CALL INDIRECT\t$callee", - "PSEUDO CALL INDIRECT\t$callee">; - -defm CALL_INDIRECT_VOID : - I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops), - (outs), (ins TypeIndex:$type, i32imm:$flags), - [], - "call_indirect\t", "call_indirect\t$type", - 0x11>; - let isReturn = 1 in defm RET_CALL_INDIRECT : I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops), - (outs), (ins TypeIndex:$type, i32imm:$flags), - [], + (outs), (ins TypeIndex:$type, i32imm:$flags), [], "return_call_indirect\t", "return_call_indirect\t$type", 0x13>, Requires<[HasTailCall]>; -let isCodeGenOnly = 1, isReturn = 1 in -defm PRET_CALL_INDIRECT: - I<(outs), (ins I32:$callee, variable_ops), - (outs), (ins I32:$callee), - [(WebAssemblyretcall I32:$callee)], - "PSEUDO RET_CALL INDIRECT\t$callee", - "PSEUDO RET_CALL INDIRECT\t$callee">, - Requires<[HasTailCall]>; - -} // IsCanonical = 1 } // Uses = [SP32,SP64], isCall = 1 - -// Patterns for matching a direct call to a global address. -def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_i32 tglobaladdr:$callee)>; -def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_i64 tglobaladdr:$callee)>; -def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_f32 tglobaladdr:$callee)>; -def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_f64 tglobaladdr:$callee)>; -def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_v2i64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_exnref tglobaladdr:$callee)>, - Requires<[HasExceptionHandling]>; -def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), - (CALL_VOID tglobaladdr:$callee)>; -def : Pat<(WebAssemblyretcall (WebAssemblywrapper tglobaladdr:$callee)), - (RET_CALL tglobaladdr:$callee)>, Requires<[HasTailCall]>; - -// Patterns for matching a direct call to an external symbol. -def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_i32 texternalsym:$callee)>; -def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_i64 texternalsym:$callee)>; -def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_f32 texternalsym:$callee)>; -def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_f64 texternalsym:$callee)>; -def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_v2i64 texternalsym:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_exnref texternalsym:$callee)>, - Requires<[HasExceptionHandling]>; -def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), - (CALL_VOID texternalsym:$callee)>; -def : Pat<(WebAssemblyretcall (WebAssemblywrapper texternalsym:$callee)), - (RET_CALL texternalsym:$callee)>, Requires<[HasTailCall]>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 1afc9a8790dcd..171dd9a67beb5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -40,21 +40,25 @@ def brlist : Operand<i32> { let PrintMethod = "printBrList"; } -// TODO: SelectionDAG's lowering insists on using a pointer as the index for -// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode -// currently. -let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +// Duplicating a BR_TABLE is almost never a good idea. In particular, it can +// lead to some nasty irreducibility due to tail merging when the br_table is in +// a loop. +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1 in { + defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops), (outs), (ins brlist:$brl), [(WebAssemblybr_table I32:$index)], "br_table \t$index", "br_table \t$brl", 0x0e>; +// TODO: SelectionDAG's lowering insists on using a pointer as the index for +// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode +// currently. defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops), (outs), (ins brlist:$brl), [(WebAssemblybr_table I64:$index)], "br_table \t$index", "br_table \t$brl", 0x0e>; -} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1 // This is technically a control-flow instruction, since all it affects is the // IP. @@ -85,8 +89,8 @@ defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] -let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { - +let hasCtrlDep = 1, isBarrier = 1 in { +let isTerminator = 1 in { let isReturn = 1 in { defm RETURN : I<(outs), (ins variable_ops), (outs), (ins), @@ -99,8 +103,21 @@ defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; } // isReturn = 1 +let isTrap = 1 in defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; -} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +} // isTerminator = 1 + +// debugtrap explicitly returns despite trapping because it is supposed to just +// get the attention of the debugger. Unfortunately, because UNREACHABLE is a +// terminator, lowering debugtrap to UNREACHABLE can create an invalid +// MachineBasicBlock when there is additional code after it. Lower it to this +// non-terminator version instead. +// TODO: Actually execute the debugger statement when running on the Web +let isTrap = 1 in +defm DEBUG_UNREACHABLE : NRI<(outs), (ins), [(debugtrap)], "unreachable", 0x00>; + +} // hasCtrlDep = 1, isBarrier = 1 //===----------------------------------------------------------------------===// // Exception handling instructions diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index aff4d20d8d823..0a4289c4959ba 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -14,11 +14,13 @@ // WebAssembly Instruction Format. // We instantiate 2 of these for every actual instruction (register based // and stack based), see below. -class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel, - Instruction { +class WebAssemblyInst<bits<32> inst, string asmstr, string stack, string is64> + : StackRel, Wasm64Rel, Instruction { bits<32> Inst = inst; // Instruction encoding. string StackBased = stack; string BaseName = NAME; + string IsWasm64 = is64; + string Wasm32Name = !subst("_A64", "_A32", NAME); let Namespace = "WebAssembly"; let Pattern = []; let AsmString = asmstr; @@ -29,8 +31,8 @@ class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel, // Normal instructions. Default instantiation of a WebAssemblyInst. class NI<dag oops, dag iops, list<dag> pattern, string stack, - string asmstr = "", bits<32> inst = -1> - : WebAssemblyInst<inst, asmstr, stack> { + string asmstr = "", bits<32> inst = -1, string is64 = "false"> + : WebAssemblyInst<inst, asmstr, stack, is64> { dag OutOperandList = oops; dag InOperandList = iops; let Pattern = pattern; @@ -52,11 +54,11 @@ class NI<dag oops, dag iops, list<dag> pattern, string stack, // there is always an equivalent pair of instructions. multiclass I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "", - bits<32> inst = -1> { + bits<32> inst = -1, string is64 = "false"> { let isCodeGenOnly = 1 in - def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst>; + def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst, is64>; let BaseName = NAME in - def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst>; + def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst, is64>; } // For instructions that have no register ops, so both sets are the same. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 221dacaf821bf..6fe1fd2b5c5a9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -235,8 +235,9 @@ bool WebAssemblyInstrInfo::reverseBranchCondition( ArrayRef<std::pair<int, const char *>> WebAssemblyInstrInfo::getSerializableTargetIndices() const { static const std::pair<int, const char *> TargetIndices[] = { - {WebAssembly::TI_LOCAL_START, "wasm-local-start"}, - {WebAssembly::TI_GLOBAL_START, "wasm-global-start"}, - {WebAssembly::TI_OPERAND_STACK_START, "wasm-operator-stack-start"}}; + {WebAssembly::TI_LOCAL, "wasm-local"}, + {WebAssembly::TI_GLOBAL_FIXED, "wasm-global-fixed"}, + {WebAssembly::TI_OPERAND_STACK, "wasm-operand-stack"}, + {WebAssembly::TI_GLOBAL_RELOC, "wasm-global-reloc"}}; return makeArrayRef(TargetIndices); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 0449014813812..5ff0d73534a6a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -24,43 +24,47 @@ def HasAddr64 : Predicate<"Subtarget->hasAddr64()">; def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, - AssemblerPredicate<"FeatureSIMD128", "simd128">; + AssemblerPredicate<(all_of FeatureSIMD128), "simd128">; def HasUnimplementedSIMD128 : Predicate<"Subtarget->hasUnimplementedSIMD128()">, - AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">; + AssemblerPredicate<(all_of FeatureUnimplementedSIMD128), "unimplemented-simd128">; def HasAtomics : Predicate<"Subtarget->hasAtomics()">, - AssemblerPredicate<"FeatureAtomics", "atomics">; + AssemblerPredicate<(all_of FeatureAtomics), "atomics">; def HasMultivalue : Predicate<"Subtarget->hasMultivalue()">, - AssemblerPredicate<"FeatureMultivalue", "multivalue">; + AssemblerPredicate<(all_of FeatureMultivalue), "multivalue">; def HasNontrappingFPToInt : Predicate<"Subtarget->hasNontrappingFPToInt()">, - AssemblerPredicate<"FeatureNontrappingFPToInt", "nontrapping-fptoint">; + AssemblerPredicate<(all_of FeatureNontrappingFPToInt), "nontrapping-fptoint">; def NotHasNontrappingFPToInt : Predicate<"!Subtarget->hasNontrappingFPToInt()">, - AssemblerPredicate<"!FeatureNontrappingFPToInt", "nontrapping-fptoint">; + AssemblerPredicate<(all_of (not FeatureNontrappingFPToInt)), "nontrapping-fptoint">; def HasSignExt : Predicate<"Subtarget->hasSignExt()">, - AssemblerPredicate<"FeatureSignExt", "sign-ext">; + AssemblerPredicate<(all_of FeatureSignExt), "sign-ext">; def HasTailCall : Predicate<"Subtarget->hasTailCall()">, - AssemblerPredicate<"FeatureTailCall", "tail-call">; + AssemblerPredicate<(all_of FeatureTailCall), "tail-call">; def HasExceptionHandling : Predicate<"Subtarget->hasExceptionHandling()">, - AssemblerPredicate<"FeatureExceptionHandling", "exception-handling">; + AssemblerPredicate<(all_of FeatureExceptionHandling), "exception-handling">; def HasBulkMemory : Predicate<"Subtarget->hasBulkMemory()">, - AssemblerPredicate<"FeatureBulkMemory", "bulk-memory">; + AssemblerPredicate<(all_of FeatureBulkMemory), "bulk-memory">; + +def HasReferenceTypes : + Predicate<"Subtarget->hasReferenceTypes()">, + AssemblerPredicate<(all_of FeatureReferenceTypes), "reference-types">; //===----------------------------------------------------------------------===// // WebAssembly-specific DAG Node Types. @@ -91,15 +95,6 @@ def WebAssemblycallseq_start : def WebAssemblycallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0", - SDT_WebAssemblyCall0, - [SDNPHasChain, SDNPVariadic]>; -def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1", - SDT_WebAssemblyCall1, - [SDNPHasChain, SDNPVariadic]>; -def WebAssemblyretcall : SDNode<"WebAssemblyISD::RET_CALL", - SDT_WebAssemblyCall0, - [SDNPHasChain, SDNPVariadic]>; def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE", SDT_WebAssemblyBrTable, [SDNPHasChain, SDNPVariadic]>; @@ -171,6 +166,9 @@ def function32_op : Operand<i32>; let OperandType = "OPERAND_OFFSET32" in def offset32_op : Operand<i32>; +let OperandType = "OPERAND_OFFSET64" in +def offset64_op : Operand<i64>; + let OperandType = "OPERAND_P2ALIGN" in { def P2Align : Operand<i32> { let PrintMethod = "printWebAssemblyP2AlignOperand"; @@ -205,6 +203,19 @@ def getStackOpcode : InstrMapping { } //===----------------------------------------------------------------------===// +// WebAssembly 32 to 64-bit instruction mapping +//===----------------------------------------------------------------------===// + +class Wasm64Rel; +def getWasm64Opcode : InstrMapping { + let FilterClass = "Wasm64Rel"; + let RowFields = ["Wasm32Name"]; + let ColFields = ["IsWasm64"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +//===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index eba9b80d32861..b3c63cc1f884b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// // TODO: -// - HasAddr64 // - WebAssemblyTargetLowering having to do with atomics // - Each has optional alignment. @@ -41,181 +40,222 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ // offsets folded into them, so we can just use add. // Defines atomic and non-atomic loads, regular and extending. -multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> { - let mayLoad = 1, UseNamedOperandTable = 1 in - defm "": I<(outs rc:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - (outs), (ins P2Align:$p2align, offset32_op:$off), - [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), - !strconcat(Name, "\t${off}${p2align}"), Opcode>; +multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode, + list<Predicate> reqs = []> { + let mayLoad = 1, UseNamedOperandTable = 1 in { + defm "_A32": I<(outs rc:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), + [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), + !strconcat(Name, "\t${off}${p2align}"), Opcode, "false">, + Requires<reqs>; + defm "_A64": I<(outs rc:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), + [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), + !strconcat(Name, "\t${off}${p2align}"), Opcode, "true">, + Requires<reqs>; + } } // Basic load. // FIXME: When we can break syntax compatibility, reorder the fields in the // asmstrings to match the binary encoding. -defm LOAD_I32 : WebAssemblyLoad<I32, "i32.load", 0x28>; -defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>; -defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>; -defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>; +defm LOAD_I32 : WebAssemblyLoad<I32, "i32.load", 0x28, []>; +defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29, []>; +defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a, []>; +defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>; // Select loads with no constant offset. -class LoadPatNoOffset<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind I32:$addr)), (inst 0, 0, I32:$addr)>; - -def : LoadPatNoOffset<i32, load, LOAD_I32>; -def : LoadPatNoOffset<i64, load, LOAD_I64>; -def : LoadPatNoOffset<f32, load, LOAD_F32>; -def : LoadPatNoOffset<f64, load, LOAD_F64>; +multiclass LoadPatNoOffset<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind I64:$addr)), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>, + Requires<[HasAddr64]>; +} +defm : LoadPatNoOffset<i32, load, "LOAD_I32">; +defm : LoadPatNoOffset<i64, load, "LOAD_I64">; +defm : LoadPatNoOffset<f32, load, "LOAD_F32">; +defm : LoadPatNoOffset<f64, load, "LOAD_F64">; // Select loads with a constant offset. // Pattern with address + immediate offset -class LoadPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : - Pat<(ty (kind (operand I32:$addr, imm:$off))), (inst 0, imm:$off, I32:$addr)>; - -def : LoadPatImmOff<i32, load, regPlusImm, LOAD_I32>; -def : LoadPatImmOff<i64, load, regPlusImm, LOAD_I64>; -def : LoadPatImmOff<f32, load, regPlusImm, LOAD_F32>; -def : LoadPatImmOff<f64, load, regPlusImm, LOAD_F64>; -def : LoadPatImmOff<i32, load, or_is_add, LOAD_I32>; -def : LoadPatImmOff<i64, load, or_is_add, LOAD_I64>; -def : LoadPatImmOff<f32, load, or_is_add, LOAD_F32>; -def : LoadPatImmOff<f64, load, or_is_add, LOAD_F64>; +multiclass LoadPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, + string inst> { + def : Pat<(ty (kind (operand I32:$addr, imm:$off))), + (!cast<NI>(inst # "_A32") 0, imm:$off, I32:$addr)>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind (operand I64:$addr, imm:$off))), + (!cast<NI>(inst # "_A64") 0, imm:$off, I64:$addr)>, + Requires<[HasAddr64]>; +} -// Select loads with just a constant offset. -class LoadPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>; +defm : LoadPatImmOff<i32, load, regPlusImm, "LOAD_I32">; +defm : LoadPatImmOff<i64, load, regPlusImm, "LOAD_I64">; +defm : LoadPatImmOff<f32, load, regPlusImm, "LOAD_F32">; +defm : LoadPatImmOff<f64, load, regPlusImm, "LOAD_F64">; +defm : LoadPatImmOff<i32, load, or_is_add, "LOAD_I32">; +defm : LoadPatImmOff<i64, load, or_is_add, "LOAD_I64">; +defm : LoadPatImmOff<f32, load, or_is_add, "LOAD_F32">; +defm : LoadPatImmOff<f64, load, or_is_add, "LOAD_F64">; -def : LoadPatOffsetOnly<i32, load, LOAD_I32>; -def : LoadPatOffsetOnly<i64, load, LOAD_I64>; -def : LoadPatOffsetOnly<f32, load, LOAD_F32>; -def : LoadPatOffsetOnly<f64, load, LOAD_F64>; +// Select loads with just a constant offset. +multiclass LoadPatOffsetOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind imm:$off)), + (!cast<NI>(inst # "_A32") 0, imm:$off, (CONST_I32 0))>, + Requires<[HasAddr32]>; + def : Pat<(ty (kind imm:$off)), + (!cast<NI>(inst # "_A64") 0, imm:$off, (CONST_I64 0))>, + Requires<[HasAddr64]>; +} -class LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))), - (inst 0, tglobaladdr:$off, (CONST_I32 0))>, Requires<[IsNotPIC]>; +defm : LoadPatOffsetOnly<i32, load, "LOAD_I32">; +defm : LoadPatOffsetOnly<i64, load, "LOAD_I64">; +defm : LoadPatOffsetOnly<f32, load, "LOAD_F32">; +defm : LoadPatOffsetOnly<f64, load, "LOAD_F64">; + +multiclass LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))), + (!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0))>, + Requires<[IsNotPIC, HasAddr32]>; + def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))), + (!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0))>, + Requires<[IsNotPIC, HasAddr64]>; +} -def : LoadPatGlobalAddrOffOnly<i32, load, LOAD_I32>; -def : LoadPatGlobalAddrOffOnly<i64, load, LOAD_I64>; -def : LoadPatGlobalAddrOffOnly<f32, load, LOAD_F32>; -def : LoadPatGlobalAddrOffOnly<f64, load, LOAD_F64>; +defm : LoadPatGlobalAddrOffOnly<i32, load, "LOAD_I32">; +defm : LoadPatGlobalAddrOffOnly<i64, load, "LOAD_I64">; +defm : LoadPatGlobalAddrOffOnly<f32, load, "LOAD_F32">; +defm : LoadPatGlobalAddrOffOnly<f64, load, "LOAD_F64">; // Extending load. -defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>; -defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>; -defm LOAD16_S_I32 : WebAssemblyLoad<I32, "i32.load16_s", 0x2e>; -defm LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.load16_u", 0x2f>; -defm LOAD8_S_I64 : WebAssemblyLoad<I64, "i64.load8_s", 0x30>; -defm LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.load8_u", 0x31>; -defm LOAD16_S_I64 : WebAssemblyLoad<I64, "i64.load16_s", 0x32>; -defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33>; -defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>; -defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>; +defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c, []>; +defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d, []>; +defm LOAD16_S_I32 : WebAssemblyLoad<I32, "i32.load16_s", 0x2e, []>; +defm LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.load16_u", 0x2f, []>; +defm LOAD8_S_I64 : WebAssemblyLoad<I64, "i64.load8_s", 0x30, []>; +defm LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.load8_u", 0x31, []>; +defm LOAD16_S_I64 : WebAssemblyLoad<I64, "i64.load16_s", 0x32, []>; +defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33, []>; +defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>; +defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>; // Select extending loads with no constant offset. -def : LoadPatNoOffset<i32, sextloadi8, LOAD8_S_I32>; -def : LoadPatNoOffset<i32, zextloadi8, LOAD8_U_I32>; -def : LoadPatNoOffset<i32, sextloadi16, LOAD16_S_I32>; -def : LoadPatNoOffset<i32, zextloadi16, LOAD16_U_I32>; -def : LoadPatNoOffset<i64, sextloadi8, LOAD8_S_I64>; -def : LoadPatNoOffset<i64, zextloadi8, LOAD8_U_I64>; -def : LoadPatNoOffset<i64, sextloadi16, LOAD16_S_I64>; -def : LoadPatNoOffset<i64, zextloadi16, LOAD16_U_I64>; -def : LoadPatNoOffset<i64, sextloadi32, LOAD32_S_I64>; -def : LoadPatNoOffset<i64, zextloadi32, LOAD32_U_I64>; +defm : LoadPatNoOffset<i32, sextloadi8, "LOAD8_S_I32">; +defm : LoadPatNoOffset<i32, zextloadi8, "LOAD8_U_I32">; +defm : LoadPatNoOffset<i32, sextloadi16, "LOAD16_S_I32">; +defm : LoadPatNoOffset<i32, zextloadi16, "LOAD16_U_I32">; +defm : LoadPatNoOffset<i64, sextloadi8, "LOAD8_S_I64">; +defm : LoadPatNoOffset<i64, zextloadi8, "LOAD8_U_I64">; +defm : LoadPatNoOffset<i64, sextloadi16, "LOAD16_S_I64">; +defm : LoadPatNoOffset<i64, zextloadi16, "LOAD16_U_I64">; +defm : LoadPatNoOffset<i64, sextloadi32, "LOAD32_S_I64">; +defm : LoadPatNoOffset<i64, zextloadi32, "LOAD32_U_I64">; // Select extending loads with a constant offset. -def : LoadPatImmOff<i32, sextloadi8, regPlusImm, LOAD8_S_I32>; -def : LoadPatImmOff<i32, zextloadi8, regPlusImm, LOAD8_U_I32>; -def : LoadPatImmOff<i32, sextloadi16, regPlusImm, LOAD16_S_I32>; -def : LoadPatImmOff<i32, zextloadi16, regPlusImm, LOAD16_U_I32>; -def : LoadPatImmOff<i64, sextloadi8, regPlusImm, LOAD8_S_I64>; -def : LoadPatImmOff<i64, zextloadi8, regPlusImm, LOAD8_U_I64>; -def : LoadPatImmOff<i64, sextloadi16, regPlusImm, LOAD16_S_I64>; -def : LoadPatImmOff<i64, zextloadi16, regPlusImm, LOAD16_U_I64>; -def : LoadPatImmOff<i64, sextloadi32, regPlusImm, LOAD32_S_I64>; -def : LoadPatImmOff<i64, zextloadi32, regPlusImm, LOAD32_U_I64>; - -def : LoadPatImmOff<i32, sextloadi8, or_is_add, LOAD8_S_I32>; -def : LoadPatImmOff<i32, zextloadi8, or_is_add, LOAD8_U_I32>; -def : LoadPatImmOff<i32, sextloadi16, or_is_add, LOAD16_S_I32>; -def : LoadPatImmOff<i32, zextloadi16, or_is_add, LOAD16_U_I32>; -def : LoadPatImmOff<i64, sextloadi8, or_is_add, LOAD8_S_I64>; -def : LoadPatImmOff<i64, zextloadi8, or_is_add, LOAD8_U_I64>; -def : LoadPatImmOff<i64, sextloadi16, or_is_add, LOAD16_S_I64>; -def : LoadPatImmOff<i64, zextloadi16, or_is_add, LOAD16_U_I64>; -def : LoadPatImmOff<i64, sextloadi32, or_is_add, LOAD32_S_I64>; -def : LoadPatImmOff<i64, zextloadi32, or_is_add, LOAD32_U_I64>; +defm : LoadPatImmOff<i32, sextloadi8, regPlusImm, "LOAD8_S_I32">; +defm : LoadPatImmOff<i32, zextloadi8, regPlusImm, "LOAD8_U_I32">; +defm : LoadPatImmOff<i32, sextloadi16, regPlusImm, "LOAD16_S_I32">; +defm : LoadPatImmOff<i32, zextloadi16, regPlusImm, "LOAD16_U_I32">; +defm : LoadPatImmOff<i64, sextloadi8, regPlusImm, "LOAD8_S_I64">; +defm : LoadPatImmOff<i64, zextloadi8, regPlusImm, "LOAD8_U_I64">; +defm : LoadPatImmOff<i64, sextloadi16, regPlusImm, "LOAD16_S_I64">; +defm : LoadPatImmOff<i64, zextloadi16, regPlusImm, "LOAD16_U_I64">; +defm : LoadPatImmOff<i64, sextloadi32, regPlusImm, "LOAD32_S_I64">; +defm : LoadPatImmOff<i64, zextloadi32, regPlusImm, "LOAD32_U_I64">; + +defm : LoadPatImmOff<i32, sextloadi8, or_is_add, "LOAD8_S_I32">; +defm : LoadPatImmOff<i32, zextloadi8, or_is_add, "LOAD8_U_I32">; +defm : LoadPatImmOff<i32, sextloadi16, or_is_add, "LOAD16_S_I32">; +defm : LoadPatImmOff<i32, zextloadi16, or_is_add, "LOAD16_U_I32">; +defm : LoadPatImmOff<i64, sextloadi8, or_is_add, "LOAD8_S_I64">; +defm : LoadPatImmOff<i64, zextloadi8, or_is_add, "LOAD8_U_I64">; +defm : LoadPatImmOff<i64, sextloadi16, or_is_add, "LOAD16_S_I64">; +defm : LoadPatImmOff<i64, zextloadi16, or_is_add, "LOAD16_U_I64">; +defm : LoadPatImmOff<i64, sextloadi32, or_is_add, "LOAD32_S_I64">; +defm : LoadPatImmOff<i64, zextloadi32, or_is_add, "LOAD32_U_I64">; // Select extending loads with just a constant offset. -def : LoadPatOffsetOnly<i32, sextloadi8, LOAD8_S_I32>; -def : LoadPatOffsetOnly<i32, zextloadi8, LOAD8_U_I32>; -def : LoadPatOffsetOnly<i32, sextloadi16, LOAD16_S_I32>; -def : LoadPatOffsetOnly<i32, zextloadi16, LOAD16_U_I32>; - -def : LoadPatOffsetOnly<i64, sextloadi8, LOAD8_S_I64>; -def : LoadPatOffsetOnly<i64, zextloadi8, LOAD8_U_I64>; -def : LoadPatOffsetOnly<i64, sextloadi16, LOAD16_S_I64>; -def : LoadPatOffsetOnly<i64, zextloadi16, LOAD16_U_I64>; -def : LoadPatOffsetOnly<i64, sextloadi32, LOAD32_S_I64>; -def : LoadPatOffsetOnly<i64, zextloadi32, LOAD32_U_I64>; - -def : LoadPatGlobalAddrOffOnly<i32, sextloadi8, LOAD8_S_I32>; -def : LoadPatGlobalAddrOffOnly<i32, zextloadi8, LOAD8_U_I32>; -def : LoadPatGlobalAddrOffOnly<i32, sextloadi16, LOAD16_S_I32>; -def : LoadPatGlobalAddrOffOnly<i32, zextloadi16, LOAD16_U_I32>; -def : LoadPatGlobalAddrOffOnly<i64, sextloadi8, LOAD8_S_I64>; -def : LoadPatGlobalAddrOffOnly<i64, zextloadi8, LOAD8_U_I64>; -def : LoadPatGlobalAddrOffOnly<i64, sextloadi16, LOAD16_S_I64>; -def : LoadPatGlobalAddrOffOnly<i64, zextloadi16, LOAD16_U_I64>; -def : LoadPatGlobalAddrOffOnly<i64, sextloadi32, LOAD32_S_I64>; -def : LoadPatGlobalAddrOffOnly<i64, zextloadi32, LOAD32_U_I64>; +defm : LoadPatOffsetOnly<i32, sextloadi8, "LOAD8_S_I32">; +defm : LoadPatOffsetOnly<i32, zextloadi8, "LOAD8_U_I32">; +defm : LoadPatOffsetOnly<i32, sextloadi16, "LOAD16_S_I32">; +defm : LoadPatOffsetOnly<i32, zextloadi16, "LOAD16_U_I32">; + +defm : LoadPatOffsetOnly<i64, sextloadi8, "LOAD8_S_I64">; +defm : LoadPatOffsetOnly<i64, zextloadi8, "LOAD8_U_I64">; +defm : LoadPatOffsetOnly<i64, sextloadi16, "LOAD16_S_I64">; +defm : LoadPatOffsetOnly<i64, zextloadi16, "LOAD16_U_I64">; +defm : LoadPatOffsetOnly<i64, sextloadi32, "LOAD32_S_I64">; +defm : LoadPatOffsetOnly<i64, zextloadi32, "LOAD32_U_I64">; + +defm : LoadPatGlobalAddrOffOnly<i32, sextloadi8, "LOAD8_S_I32">; +defm : LoadPatGlobalAddrOffOnly<i32, zextloadi8, "LOAD8_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i32, sextloadi16, "LOAD16_S_I32">; +defm : LoadPatGlobalAddrOffOnly<i32, zextloadi16, "LOAD16_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i64, sextloadi8, "LOAD8_S_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, zextloadi8, "LOAD8_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, sextloadi16, "LOAD16_S_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, zextloadi16, "LOAD16_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, sextloadi32, "LOAD32_S_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, zextloadi32, "LOAD32_U_I64">; // Resolve "don't care" extending loads to zero-extending loads. This is // somewhat arbitrary, but zero-extending is conceptually simpler. // Select "don't care" extending loads with no constant offset. -def : LoadPatNoOffset<i32, extloadi8, LOAD8_U_I32>; -def : LoadPatNoOffset<i32, extloadi16, LOAD16_U_I32>; -def : LoadPatNoOffset<i64, extloadi8, LOAD8_U_I64>; -def : LoadPatNoOffset<i64, extloadi16, LOAD16_U_I64>; -def : LoadPatNoOffset<i64, extloadi32, LOAD32_U_I64>; +defm : LoadPatNoOffset<i32, extloadi8, "LOAD8_U_I32">; +defm : LoadPatNoOffset<i32, extloadi16, "LOAD16_U_I32">; +defm : LoadPatNoOffset<i64, extloadi8, "LOAD8_U_I64">; +defm : LoadPatNoOffset<i64, extloadi16, "LOAD16_U_I64">; +defm : LoadPatNoOffset<i64, extloadi32, "LOAD32_U_I64">; // Select "don't care" extending loads with a constant offset. -def : LoadPatImmOff<i32, extloadi8, regPlusImm, LOAD8_U_I32>; -def : LoadPatImmOff<i32, extloadi16, regPlusImm, LOAD16_U_I32>; -def : LoadPatImmOff<i64, extloadi8, regPlusImm, LOAD8_U_I64>; -def : LoadPatImmOff<i64, extloadi16, regPlusImm, LOAD16_U_I64>; -def : LoadPatImmOff<i64, extloadi32, regPlusImm, LOAD32_U_I64>; -def : LoadPatImmOff<i32, extloadi8, or_is_add, LOAD8_U_I32>; -def : LoadPatImmOff<i32, extloadi16, or_is_add, LOAD16_U_I32>; -def : LoadPatImmOff<i64, extloadi8, or_is_add, LOAD8_U_I64>; -def : LoadPatImmOff<i64, extloadi16, or_is_add, LOAD16_U_I64>; -def : LoadPatImmOff<i64, extloadi32, or_is_add, LOAD32_U_I64>; +defm : LoadPatImmOff<i32, extloadi8, regPlusImm, "LOAD8_U_I32">; +defm : LoadPatImmOff<i32, extloadi16, regPlusImm, "LOAD16_U_I32">; +defm : LoadPatImmOff<i64, extloadi8, regPlusImm, "LOAD8_U_I64">; +defm : LoadPatImmOff<i64, extloadi16, regPlusImm, "LOAD16_U_I64">; +defm : LoadPatImmOff<i64, extloadi32, regPlusImm, "LOAD32_U_I64">; +defm : LoadPatImmOff<i32, extloadi8, or_is_add, "LOAD8_U_I32">; +defm : LoadPatImmOff<i32, extloadi16, or_is_add, "LOAD16_U_I32">; +defm : LoadPatImmOff<i64, extloadi8, or_is_add, "LOAD8_U_I64">; +defm : LoadPatImmOff<i64, extloadi16, or_is_add, "LOAD16_U_I64">; +defm : LoadPatImmOff<i64, extloadi32, or_is_add, "LOAD32_U_I64">; // Select "don't care" extending loads with just a constant offset. -def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>; -def : LoadPatOffsetOnly<i32, extloadi16, LOAD16_U_I32>; -def : LoadPatOffsetOnly<i64, extloadi8, LOAD8_U_I64>; -def : LoadPatOffsetOnly<i64, extloadi16, LOAD16_U_I64>; -def : LoadPatOffsetOnly<i64, extloadi32, LOAD32_U_I64>; -def : LoadPatGlobalAddrOffOnly<i32, extloadi8, LOAD8_U_I32>; -def : LoadPatGlobalAddrOffOnly<i32, extloadi16, LOAD16_U_I32>; -def : LoadPatGlobalAddrOffOnly<i64, extloadi8, LOAD8_U_I64>; -def : LoadPatGlobalAddrOffOnly<i64, extloadi16, LOAD16_U_I64>; -def : LoadPatGlobalAddrOffOnly<i64, extloadi32, LOAD32_U_I64>; +defm : LoadPatOffsetOnly<i32, extloadi8, "LOAD8_U_I32">; +defm : LoadPatOffsetOnly<i32, extloadi16, "LOAD16_U_I32">; +defm : LoadPatOffsetOnly<i64, extloadi8, "LOAD8_U_I64">; +defm : LoadPatOffsetOnly<i64, extloadi16, "LOAD16_U_I64">; +defm : LoadPatOffsetOnly<i64, extloadi32, "LOAD32_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i32, extloadi8, "LOAD8_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i32, extloadi16, "LOAD16_U_I32">; +defm : LoadPatGlobalAddrOffOnly<i64, extloadi8, "LOAD8_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, extloadi16, "LOAD16_U_I64">; +defm : LoadPatGlobalAddrOffOnly<i64, extloadi32, "LOAD32_U_I64">; // Defines atomic and non-atomic stores, regular and truncating -multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> { +multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode, + list<Predicate> reqs = []> { let mayStore = 1, UseNamedOperandTable = 1 in - defm "" : I<(outs), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), - (outs), - (ins P2Align:$p2align, offset32_op:$off), [], - !strconcat(Name, "\t${off}(${addr})${p2align}, $val"), - !strconcat(Name, "\t${off}${p2align}"), Opcode>; + defm "_A32" : I<(outs), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), + (outs), + (ins P2Align:$p2align, offset32_op:$off), [], + !strconcat(Name, "\t${off}(${addr})${p2align}, $val"), + !strconcat(Name, "\t${off}${p2align}"), Opcode, "false">, + Requires<reqs>; + let mayStore = 1, UseNamedOperandTable = 1 in + defm "_A64" : I<(outs), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$val), + (outs), + (ins P2Align:$p2align, offset64_op:$off), [], + !strconcat(Name, "\t${off}(${addr})${p2align}, $val"), + !strconcat(Name, "\t${off}${p2align}"), Opcode, "true">, + Requires<reqs>; } + // Basic store. // Note: WebAssembly inverts SelectionDAG's usual operand order. defm STORE_I32 : WebAssemblyStore<I32, "i32.store", 0x36>; @@ -224,43 +264,68 @@ defm STORE_F32 : WebAssemblyStore<F32, "f32.store", 0x38>; defm STORE_F64 : WebAssemblyStore<F64, "f64.store", 0x39>; // Select stores with no constant offset. -class StorePatNoOffset<ValueType ty, PatFrag node, NI inst> : - Pat<(node ty:$val, I32:$addr), (inst 0, 0, I32:$addr, ty:$val)>; +multiclass StorePatNoOffset<ValueType ty, PatFrag node, string inst> { + def : Pat<(node ty:$val, I32:$addr), + (!cast<NI>(inst # "_A32") 0, 0, I32:$addr, ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(node ty:$val, I64:$addr), + (!cast<NI>(inst # "_A64") 0, 0, I64:$addr, ty:$val)>, + Requires<[HasAddr64]>; +} -def : StorePatNoOffset<i32, store, STORE_I32>; -def : StorePatNoOffset<i64, store, STORE_I64>; -def : StorePatNoOffset<f32, store, STORE_F32>; -def : StorePatNoOffset<f64, store, STORE_F64>; +defm : StorePatNoOffset<i32, store, "STORE_I32">; +defm : StorePatNoOffset<i64, store, "STORE_I64">; +defm : StorePatNoOffset<f32, store, "STORE_F32">; +defm : StorePatNoOffset<f64, store, "STORE_F64">; // Select stores with a constant offset. -class StorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : - Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), - (inst 0, imm:$off, I32:$addr, ty:$val)>; - -def : StorePatImmOff<i32, store, regPlusImm, STORE_I32>; -def : StorePatImmOff<i64, store, regPlusImm, STORE_I64>; -def : StorePatImmOff<f32, store, regPlusImm, STORE_F32>; -def : StorePatImmOff<f64, store, regPlusImm, STORE_F64>; -def : StorePatImmOff<i32, store, or_is_add, STORE_I32>; -def : StorePatImmOff<i64, store, or_is_add, STORE_I64>; -def : StorePatImmOff<f32, store, or_is_add, STORE_F32>; -def : StorePatImmOff<f64, store, or_is_add, STORE_F64>; +multiclass StorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, + string inst> { + def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), + (!cast<NI>(inst # "_A32") 0, imm:$off, I32:$addr, ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)), + (!cast<NI>(inst # "_A64") 0, imm:$off, I64:$addr, ty:$val)>, + Requires<[HasAddr64]>; +} + +defm : StorePatImmOff<i32, store, regPlusImm, "STORE_I32">; +defm : StorePatImmOff<i64, store, regPlusImm, "STORE_I64">; +defm : StorePatImmOff<f32, store, regPlusImm, "STORE_F32">; +defm : StorePatImmOff<f64, store, regPlusImm, "STORE_F64">; +defm : StorePatImmOff<i32, store, or_is_add, "STORE_I32">; +defm : StorePatImmOff<i64, store, or_is_add, "STORE_I64">; +defm : StorePatImmOff<f32, store, or_is_add, "STORE_F32">; +defm : StorePatImmOff<f64, store, or_is_add, "STORE_F64">; // Select stores with just a constant offset. -class StorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; -def : StorePatOffsetOnly<i32, store, STORE_I32>; -def : StorePatOffsetOnly<i64, store, STORE_I64>; -def : StorePatOffsetOnly<f32, store, STORE_F32>; -def : StorePatOffsetOnly<f64, store, STORE_F64>; - -class StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : - Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), - (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, Requires<[IsNotPIC]>; -def : StorePatGlobalAddrOffOnly<i32, store, STORE_I32>; -def : StorePatGlobalAddrOffOnly<i64, store, STORE_I64>; -def : StorePatGlobalAddrOffOnly<f32, store, STORE_F32>; -def : StorePatGlobalAddrOffOnly<f64, store, STORE_F64>; +multiclass StorePatOffsetOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(kind ty:$val, imm:$off), + (!cast<NI>(inst # "_A32") 0, imm:$off, (CONST_I32 0), ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind ty:$val, imm:$off), + (!cast<NI>(inst # "_A64") 0, imm:$off, (CONST_I64 0), ty:$val)>, + Requires<[HasAddr64]>; +} +defm : StorePatOffsetOnly<i32, store, "STORE_I32">; +defm : StorePatOffsetOnly<i64, store, "STORE_I64">; +defm : StorePatOffsetOnly<f32, store, "STORE_F32">; +defm : StorePatOffsetOnly<f64, store, "STORE_F64">; + +multiclass StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> { + def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), + (!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0), + ty:$val)>, + Requires<[IsNotPIC, HasAddr32]>; + def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), + (!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0), + ty:$val)>, + Requires<[IsNotPIC, HasAddr64]>; +} +defm : StorePatGlobalAddrOffOnly<i32, store, "STORE_I32">; +defm : StorePatGlobalAddrOffOnly<i64, store, "STORE_I64">; +defm : StorePatGlobalAddrOffOnly<f32, store, "STORE_F32">; +defm : StorePatGlobalAddrOffOnly<f64, store, "STORE_F64">; // Truncating store. defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>; @@ -270,51 +335,54 @@ defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>; defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>; // Select truncating stores with no constant offset. -def : StorePatNoOffset<i32, truncstorei8, STORE8_I32>; -def : StorePatNoOffset<i32, truncstorei16, STORE16_I32>; -def : StorePatNoOffset<i64, truncstorei8, STORE8_I64>; -def : StorePatNoOffset<i64, truncstorei16, STORE16_I64>; -def : StorePatNoOffset<i64, truncstorei32, STORE32_I64>; +defm : StorePatNoOffset<i32, truncstorei8, "STORE8_I32">; +defm : StorePatNoOffset<i32, truncstorei16, "STORE16_I32">; +defm : StorePatNoOffset<i64, truncstorei8, "STORE8_I64">; +defm : StorePatNoOffset<i64, truncstorei16, "STORE16_I64">; +defm : StorePatNoOffset<i64, truncstorei32, "STORE32_I64">; // Select truncating stores with a constant offset. -def : StorePatImmOff<i32, truncstorei8, regPlusImm, STORE8_I32>; -def : StorePatImmOff<i32, truncstorei16, regPlusImm, STORE16_I32>; -def : StorePatImmOff<i64, truncstorei8, regPlusImm, STORE8_I64>; -def : StorePatImmOff<i64, truncstorei16, regPlusImm, STORE16_I64>; -def : StorePatImmOff<i64, truncstorei32, regPlusImm, STORE32_I64>; -def : StorePatImmOff<i32, truncstorei8, or_is_add, STORE8_I32>; -def : StorePatImmOff<i32, truncstorei16, or_is_add, STORE16_I32>; -def : StorePatImmOff<i64, truncstorei8, or_is_add, STORE8_I64>; -def : StorePatImmOff<i64, truncstorei16, or_is_add, STORE16_I64>; -def : StorePatImmOff<i64, truncstorei32, or_is_add, STORE32_I64>; +defm : StorePatImmOff<i32, truncstorei8, regPlusImm, "STORE8_I32">; +defm : StorePatImmOff<i32, truncstorei16, regPlusImm, "STORE16_I32">; +defm : StorePatImmOff<i64, truncstorei8, regPlusImm, "STORE8_I64">; +defm : StorePatImmOff<i64, truncstorei16, regPlusImm, "STORE16_I64">; +defm : StorePatImmOff<i64, truncstorei32, regPlusImm, "STORE32_I64">; +defm : StorePatImmOff<i32, truncstorei8, or_is_add, "STORE8_I32">; +defm : StorePatImmOff<i32, truncstorei16, or_is_add, "STORE16_I32">; +defm : StorePatImmOff<i64, truncstorei8, or_is_add, "STORE8_I64">; +defm : StorePatImmOff<i64, truncstorei16, or_is_add, "STORE16_I64">; +defm : StorePatImmOff<i64, truncstorei32, or_is_add, "STORE32_I64">; // Select truncating stores with just a constant offset. -def : StorePatOffsetOnly<i32, truncstorei8, STORE8_I32>; -def : StorePatOffsetOnly<i32, truncstorei16, STORE16_I32>; -def : StorePatOffsetOnly<i64, truncstorei8, STORE8_I64>; -def : StorePatOffsetOnly<i64, truncstorei16, STORE16_I64>; -def : StorePatOffsetOnly<i64, truncstorei32, STORE32_I64>; -def : StorePatGlobalAddrOffOnly<i32, truncstorei8, STORE8_I32>; -def : StorePatGlobalAddrOffOnly<i32, truncstorei16, STORE16_I32>; -def : StorePatGlobalAddrOffOnly<i64, truncstorei8, STORE8_I64>; -def : StorePatGlobalAddrOffOnly<i64, truncstorei16, STORE16_I64>; -def : StorePatGlobalAddrOffOnly<i64, truncstorei32, STORE32_I64>; - +defm : StorePatOffsetOnly<i32, truncstorei8, "STORE8_I32">; +defm : StorePatOffsetOnly<i32, truncstorei16, "STORE16_I32">; +defm : StorePatOffsetOnly<i64, truncstorei8, "STORE8_I64">; +defm : StorePatOffsetOnly<i64, truncstorei16, "STORE16_I64">; +defm : StorePatOffsetOnly<i64, truncstorei32, "STORE32_I64">; +defm : StorePatGlobalAddrOffOnly<i32, truncstorei8, "STORE8_I32">; +defm : StorePatGlobalAddrOffOnly<i32, truncstorei16, "STORE16_I32">; +defm : StorePatGlobalAddrOffOnly<i64, truncstorei8, "STORE8_I64">; +defm : StorePatGlobalAddrOffOnly<i64, truncstorei16, "STORE16_I64">; +defm : StorePatGlobalAddrOffOnly<i64, truncstorei32, "STORE32_I64">; + +multiclass MemoryOps<WebAssemblyRegClass rc, string B> { // Current memory size. -defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags), +defm MEMORY_SIZE_A#B : I<(outs rc:$dst), (ins i32imm:$flags), (outs), (ins i32imm:$flags), - [(set I32:$dst, + [(set rc:$dst, (int_wasm_memory_size (i32 imm:$flags)))], "memory.size\t$dst, $flags", "memory.size\t$flags", - 0x3f>, - Requires<[HasAddr32]>; + 0x3f>; // Grow memory. -defm MEMORY_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta), +defm MEMORY_GROW_A#B : I<(outs rc:$dst), (ins i32imm:$flags, rc:$delta), (outs), (ins i32imm:$flags), - [(set I32:$dst, + [(set rc:$dst, (int_wasm_memory_grow (i32 imm:$flags), - I32:$delta))], + rc:$delta))], "memory.grow\t$dst, $flags, $delta", - "memory.grow\t$flags", 0x40>, - Requires<[HasAddr32]>; + "memory.grow\t$flags", 0x40>; +} + +defm : MemoryOps<I32, "32">; +defm : MemoryOps<I64, "64">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td index afe89de60b361..14d723750f07d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// WebAssembly refence type operand codegen constructs. +/// WebAssembly reference type operand codegen constructs. /// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 64033c993e3f9..4f3da2f35c61d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -40,119 +40,150 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; //===----------------------------------------------------------------------===// // Load: v128.load -let mayLoad = 1, UseNamedOperandTable = 1 in -defm LOAD_V128 : +let mayLoad = 1, UseNamedOperandTable = 1 in { +defm LOAD_V128_A32 : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "v128.load\t$dst, ${off}(${addr})$p2align", "v128.load\t$off$p2align", 0>; +defm LOAD_V128_A64 : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + "v128.load\t$dst, ${off}(${addr})$p2align", + "v128.load\t$off$p2align", 0>; +} // Def load and store patterns from WebAssemblyInstrMemory.td for vector types foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -def : LoadPatNoOffset<vec_t, load, LOAD_V128>; -def : LoadPatImmOff<vec_t, load, regPlusImm, LOAD_V128>; -def : LoadPatImmOff<vec_t, load, or_is_add, LOAD_V128>; -def : LoadPatOffsetOnly<vec_t, load, LOAD_V128>; -def : LoadPatGlobalAddrOffOnly<vec_t, load, LOAD_V128>; +defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">; +defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">; +defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">; +defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">; +defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">; } // vNxM.load_splat multiclass SIMDLoadSplat<string vec, bits<32> simdop> { - let mayLoad = 1, UseNamedOperandTable = 1, - Predicates = [HasUnimplementedSIMD128] in - defm LOAD_SPLAT_#vec : - SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - (outs), (ins P2Align:$p2align, offset32_op:$off), [], + let mayLoad = 1, UseNamedOperandTable = 1 in { + defm LOAD_SPLAT_#vec#_A32 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), + (ins P2Align:$p2align, offset32_op:$off), [], + vec#".load_splat\t$dst, ${off}(${addr})$p2align", + vec#".load_splat\t$off$p2align", simdop>; + defm LOAD_SPLAT_#vec#_A64 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), + (ins P2Align:$p2align, offset64_op:$off), [], vec#".load_splat\t$dst, ${off}(${addr})$p2align", vec#".load_splat\t$off$p2align", simdop>; + } } -defm "" : SIMDLoadSplat<"v8x16", 194>; -defm "" : SIMDLoadSplat<"v16x8", 195>; -defm "" : SIMDLoadSplat<"v32x4", 196>; -defm "" : SIMDLoadSplat<"v64x2", 197>; +defm "" : SIMDLoadSplat<"v8x16", 7>; +defm "" : SIMDLoadSplat<"v16x8", 8>; +defm "" : SIMDLoadSplat<"v32x4", 9>; +defm "" : SIMDLoadSplat<"v64x2", 10>; def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; -let Predicates = [HasUnimplementedSIMD128] in foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { -def : LoadPatNoOffset<!cast<ValueType>(args[0]), - load_splat, - !cast<NI>("LOAD_SPLAT_"#args[1])>; -def : LoadPatImmOff<!cast<ValueType>(args[0]), - load_splat, - regPlusImm, - !cast<NI>("LOAD_SPLAT_"#args[1])>; -def : LoadPatImmOff<!cast<ValueType>(args[0]), - load_splat, - or_is_add, - !cast<NI>("LOAD_SPLAT_"#args[1])>; -def : LoadPatOffsetOnly<!cast<ValueType>(args[0]), - load_splat, - !cast<NI>("LOAD_SPLAT_"#args[1])>; -def : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]), - load_splat, - !cast<NI>("LOAD_SPLAT_"#args[1])>; +defm : LoadPatNoOffset<!cast<ValueType>(args[0]), + load_splat, + "LOAD_SPLAT_"#args[1]>; +defm : LoadPatImmOff<!cast<ValueType>(args[0]), + load_splat, + regPlusImm, + "LOAD_SPLAT_"#args[1]>; +defm : LoadPatImmOff<!cast<ValueType>(args[0]), + load_splat, + or_is_add, + "LOAD_SPLAT_"#args[1]>; +defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]), + load_splat, + "LOAD_SPLAT_"#args[1]>; +defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]), + load_splat, + "LOAD_SPLAT_"#args[1]>; } // Load and extend multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> { - let mayLoad = 1, UseNamedOperandTable = 1, - Predicates = [HasUnimplementedSIMD128] in { - defm LOAD_EXTEND_S_#vec_t : - SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + let mayLoad = 1, UseNamedOperandTable = 1 in { + defm LOAD_EXTEND_S_#vec_t#_A32 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], name#"_s\t$dst, ${off}(${addr})$p2align", name#"_s\t$off$p2align", simdop>; - defm LOAD_EXTEND_U_#vec_t : - SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + defm LOAD_EXTEND_U_#vec_t#_A32 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], name#"_u\t$dst, ${off}(${addr})$p2align", name#"_u\t$off$p2align", !add(simdop, 1)>; + defm LOAD_EXTEND_S_#vec_t#_A64 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + name#"_s\t$dst, ${off}(${addr})$p2align", + name#"_s\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec_t#_A64 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + name#"_u\t$dst, ${off}(${addr})$p2align", + name#"_u\t$off$p2align", !add(simdop, 1)>; } } -defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 210>; -defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 212>; -defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 214>; +defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>; +defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>; +defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>; -let Predicates = [HasUnimplementedSIMD128] in foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in foreach exts = [["sextloadv", "_S"], ["zextloadv", "_U"], ["extloadv", "_U"]] in { -def : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]), - !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; -def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm, - !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; -def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add, - !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; -def : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), - !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; -def : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), - !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]), + "LOAD_EXTEND"#exts[1]#"_"#types[0]>; +defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm, + "LOAD_EXTEND"#exts[1]#"_"#types[0]>; +defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add, + "LOAD_EXTEND"#exts[1]#"_"#types[0]>; +defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), + "LOAD_EXTEND"#exts[1]#"_"#types[0]>; +defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), + "LOAD_EXTEND"#exts[1]#"_"#types[0]>; } // Store: v128.store -let mayStore = 1, UseNamedOperandTable = 1 in -defm STORE_V128 : +let mayStore = 1, UseNamedOperandTable = 1 in { +defm STORE_V128_A32 : SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "v128.store\t${off}(${addr})$p2align, $vec", - "v128.store\t$off$p2align", 1>; - + "v128.store\t$off$p2align", 11>; +defm STORE_V128_A64 : + SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + "v128.store\t${off}(${addr})$p2align, $vec", + "v128.store\t$off$p2align", 11>; +} foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { // Def load and store patterns from WebAssemblyInstrMemory.td for vector types -def : StorePatNoOffset<vec_t, store, STORE_V128>; -def : StorePatImmOff<vec_t, store, regPlusImm, STORE_V128>; -def : StorePatImmOff<vec_t, store, or_is_add, STORE_V128>; -def : StorePatOffsetOnly<vec_t, store, STORE_V128>; -def : StorePatGlobalAddrOffOnly<vec_t, store, STORE_V128>; +defm : StorePatNoOffset<vec_t, store, "STORE_V128">; +defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">; +defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">; +defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">; +defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">; } //===----------------------------------------------------------------------===// @@ -166,7 +197,7 @@ multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> { defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, [(set V128:$dst, (vec_t pat))], "v128.const\t$dst, "#args, - "v128.const\t"#args, 2>; + "v128.const\t"#args, 12>; } defm "" : ConstVec<v16i8, @@ -244,7 +275,7 @@ defm SHUFFLE : "v8x16.shuffle\t"# "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", - 3>; + 13>; // Shuffles after custom lowering def wasm_shuffle_t : SDTypeProfile<1, 18, []>; @@ -273,12 +304,11 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), // Swizzle lanes: v8x16.swizzle def wasm_swizzle_t : SDTypeProfile<1, 2, []>; def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; -let Predicates = [HasUnimplementedSIMD128] in defm SWIZZLE : SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), [(set (v16i8 V128:$dst), (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], - "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>; + "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), (SWIZZLE V128:$src, V128:$mask)>; @@ -298,19 +328,17 @@ def splat16 : PatFrag<(ops node:$x), (build_vector multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, PatFrag splat_pat, bits<32> simdop> { - // Prefer splats over v128.const for const splats (65 is lowest that works) - let AddedComplexity = 65 in defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins), [(set (vec_t V128:$dst), (splat_pat reg_t:$x))], vec#".splat\t$dst, $x", vec#".splat", simdop>; } -defm "" : Splat<v16i8, "i8x16", I32, splat16, 4>; -defm "" : Splat<v8i16, "i16x8", I32, splat8, 8>; -defm "" : Splat<v4i32, "i32x4", I32, splat4, 12>; -defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>; -defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>; -defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>; +defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>; +defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>; +defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>; +defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>; +defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>; +defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>; // scalar_to_vector leaves high lanes undefined, so can be a splat class ScalarSplatPat<ValueType vec_t, ValueType lane_t, @@ -330,82 +358,49 @@ def : ScalarSplatPat<v2f64, f64, F64>; //===----------------------------------------------------------------------===// // Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u -multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t, - WebAssemblyRegClass reg_t, bits<32> simdop, - string suffix = "", SDNode extract = vector_extract> { +multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, + bits<32> simdop, string suffix = ""> { defm EXTRACT_LANE_#vec_t#suffix : SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx), - (outs), (ins vec_i8imm_op:$idx), - [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))], + (outs), (ins vec_i8imm_op:$idx), [], vec#".extract_lane"#suffix#"\t$dst, $vec, $idx", vec#".extract_lane"#suffix#"\t$idx", simdop>; } -multiclass ExtractPat<ValueType lane_t, int mask> { - def _s : PatFrag<(ops node:$vec, node:$idx), - (i32 (sext_inreg - (i32 (vector_extract - node:$vec, - node:$idx - )), - lane_t - ))>; - def _u : PatFrag<(ops node:$vec, node:$idx), - (i32 (and - (i32 (vector_extract - node:$vec, - node:$idx - )), - (i32 mask) - ))>; -} - -defm extract_i8x16 : ExtractPat<i8, 0xff>; -defm extract_i16x8 : ExtractPat<i16, 0xffff>; - -multiclass ExtractLaneExtended<string sign, bits<32> baseInst> { - defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign, - !cast<PatFrag>("extract_i8x16"#sign)>; - defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign, - !cast<PatFrag>("extract_i16x8"#sign)>; -} - -defm "" : ExtractLaneExtended<"_s", 5>; -let Predicates = [HasUnimplementedSIMD128] in -defm "" : ExtractLaneExtended<"_u", 6>; -defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>; -defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>; -defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>; -defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>; - -// It would be more conventional to use unsigned extracts, but v8 -// doesn't implement them yet -def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))), - (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>; -def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))), - (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>; - -// Lower undef lane indices to zero -def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)), - (EXTRACT_LANE_v16i8_u V128:$vec, 0)>; -def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)), - (EXTRACT_LANE_v8i16_u V128:$vec, 0)>; -def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)), - (EXTRACT_LANE_v16i8_u V128:$vec, 0)>; -def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)), - (EXTRACT_LANE_v8i16_u V128:$vec, 0)>; -def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8), - (EXTRACT_LANE_v16i8_s V128:$vec, 0)>; -def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16), - (EXTRACT_LANE_v8i16_s V128:$vec, 0)>; -def : Pat<(vector_extract (v4i32 V128:$vec), undef), - (EXTRACT_LANE_v4i32 V128:$vec, 0)>; -def : Pat<(vector_extract (v2i64 V128:$vec), undef), - (EXTRACT_LANE_v2i64 V128:$vec, 0)>; -def : Pat<(vector_extract (v4f32 V128:$vec), undef), - (EXTRACT_LANE_v4f32 V128:$vec, 0)>; -def : Pat<(vector_extract (v2f64 V128:$vec), undef), - (EXTRACT_LANE_v2f64 V128:$vec, 0)>; +defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">; +defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">; +defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">; +defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">; +defm "" : ExtractLane<v4i32, "i32x4", I32, 27>; +defm "" : ExtractLane<v2i64, "i64x2", I64, 29>; +defm "" : ExtractLane<v4f32, "f32x4", F32, 31>; +defm "" : ExtractLane<v2f64, "f64x2", F64, 33>; + +def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), + (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), + (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), + (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), + (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), + (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), + (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>; + +def : Pat< + (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), + (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>; +def : Pat< + (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), + (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; +def : Pat< + (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), + (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>; +def : Pat< + (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), + (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; // Replace lane value: replace_lane multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t, @@ -420,12 +415,12 @@ multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t, vec#".replace_lane\t$idx", simdop>; } -defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 7>; -defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 11>; -defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 14>; -defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 17>; -defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 20>; -defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 23>; +defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>; +defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>; +defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>; +defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>; +defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>; +defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>; // Lower undef lane indices to zero def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), @@ -471,35 +466,35 @@ multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { // Equality: eq let isCommutable = 1 in { -defm EQ : SIMDConditionInt<"eq", SETEQ, 24>; -defm EQ : SIMDConditionFP<"eq", SETOEQ, 64>; +defm EQ : SIMDConditionInt<"eq", SETEQ, 35>; +defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>; } // isCommutable = 1 // Non-equality: ne let isCommutable = 1 in { -defm NE : SIMDConditionInt<"ne", SETNE, 25>; -defm NE : SIMDConditionFP<"ne", SETUNE, 65>; +defm NE : SIMDConditionInt<"ne", SETNE, 36>; +defm NE : SIMDConditionFP<"ne", SETUNE, 66>; } // isCommutable = 1 // Less than: lt_s / lt_u / lt -defm LT_S : SIMDConditionInt<"lt_s", SETLT, 26>; -defm LT_U : SIMDConditionInt<"lt_u", SETULT, 27>; -defm LT : SIMDConditionFP<"lt", SETOLT, 66>; +defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>; +defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>; +defm LT : SIMDConditionFP<"lt", SETOLT, 67>; // Greater than: gt_s / gt_u / gt -defm GT_S : SIMDConditionInt<"gt_s", SETGT, 28>; -defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 29>; -defm GT : SIMDConditionFP<"gt", SETOGT, 67>; +defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>; +defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>; +defm GT : SIMDConditionFP<"gt", SETOGT, 68>; // Less than or equal: le_s / le_u / le -defm LE_S : SIMDConditionInt<"le_s", SETLE, 30>; -defm LE_U : SIMDConditionInt<"le_u", SETULE, 31>; -defm LE : SIMDConditionFP<"le", SETOLE, 68>; +defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>; +defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>; +defm LE : SIMDConditionFP<"le", SETOLE, 69>; // Greater than or equal: ge_s / ge_u / ge -defm GE_S : SIMDConditionInt<"ge_s", SETGE, 32>; -defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>; -defm GE : SIMDConditionFP<"ge", SETOGE, 69>; +defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>; +defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>; +defm GE : SIMDConditionFP<"ge", SETOGE, 70>; // Lower float comparisons that don't care about NaN to standard WebAssembly // float comparisons. These instructions are generated with nnan and in the @@ -548,19 +543,18 @@ multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name, // Bitwise logic: v128.not foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in -defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 76>; +defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>; // Bitwise logic: v128.and / v128.or / v128.xor let isCommutable = 1 in { -defm AND : SIMDBitwise<and, "and", 77>; -defm OR : SIMDBitwise<or, "or", 78>; -defm XOR : SIMDBitwise<xor, "xor", 79>; +defm AND : SIMDBitwise<and, "and", 78>; +defm OR : SIMDBitwise<or, "or", 80>; +defm XOR : SIMDBitwise<xor, "xor", 81>; } // isCommutable = 1 // Bitwise logic: v128.andnot def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; -let Predicates = [HasUnimplementedSIMD128] in -defm ANDNOT : SIMDBitwise<andnot, "andnot", 216>; +defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>; // Bitwise select: v128.bitselect foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in @@ -571,7 +565,7 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) )) )], - "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 80>; + "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; // Bitselect is equivalent to (c & v1) | (~c & v2) foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in @@ -586,9 +580,9 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>; - defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 17)>; - defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 34)>; - defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 51)>; + defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; + defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; + defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; } multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name, @@ -600,22 +594,25 @@ multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name, multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> { defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>; - defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 17)>; - defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 34)>; - defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 51)>; + defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>; + defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>; + defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>; } // Integer vector negation def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; +// Integer absolute value: abs +defm ABS : SIMDUnaryInt<abs, "abs", 96>; + // Integer negation: neg -defm NEG : SIMDUnaryInt<ivneg, "neg", 81>; +defm NEG : SIMDUnaryInt<ivneg, "neg", 97>; // Any lane true: any_true -defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>; +defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>; // All lanes true: all_true -defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>; +defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>; // Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 // can be folded out @@ -639,109 +636,108 @@ def : Pat<(i32 (seteq (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; } +multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> { + defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), + [(set I32:$dst, + (i32 (int_wasm_bitmask (vec_t V128:$vec))) + )], + vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>; +} + +defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>; +defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>; +defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>; + //===----------------------------------------------------------------------===// // Bit shifts //===----------------------------------------------------------------------===// -multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec, - string name, bits<32> simdop> { +multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name, + bits<32> simdop> { defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins), - [(set (vec_t V128:$dst), - (node V128:$vec, (vec_t shift_vec)))], + [(set (vec_t V128:$dst), (node V128:$vec, I32:$x))], vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>; } multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { - defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>; - defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name, - !add(baseInst, 17)>; - defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name, - !add(baseInst, 34)>; - defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))), - name, !add(baseInst, 51)>; + defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>; + defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>; + defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>; + defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>; } -// Left shift by scalar: shl -defm SHL : SIMDShiftInt<shl, "shl", 84>; - -// Right shift by scalar: shr_s / shr_u -defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>; -defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>; - -// Truncate i64 shift operands to i32s, except if they are already i32s -foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in { -def : Pat<(v2i64 (shifts[0] - (v2i64 V128:$vec), - (v2i64 (splat2 (i64 (sext I32:$x)))) - )), - (v2i64 (shifts[1] (v2i64 V128:$vec), (i32 I32:$x)))>; -def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))), - (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>; -} - -// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping +// WebAssembly SIMD shifts are nonstandard in that the shift amount is +// an i32 rather than a vector, so they need custom nodes. def wasm_shift_t : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>] >; def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; -foreach shifts = [[wasm_shl, SHL_v2i64], - [wasm_shr_s, SHR_S_v2i64], - [wasm_shr_u, SHR_U_v2i64]] in -def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)), - (v2i64 (shifts[1] (v2i64 V128:$vec), I32:$x))>; + +// Left shift by scalar: shl +defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>; + +// Right shift by scalar: shr_s / shr_u +defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>; +defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>; //===----------------------------------------------------------------------===// // Integer binary arithmetic //===----------------------------------------------------------------------===// +multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; + defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; + defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; +} + multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>; - defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 17)>; + defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>; } multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDBinaryIntSmall<node, name, baseInst>; - defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 34)>; + defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>; } multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; - defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 51)>; + defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>; } // Integer addition: add / add_saturate_s / add_saturate_u let isCommutable = 1 in { -defm ADD : SIMDBinaryInt<add, "add", 87>; -defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 88>; -defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 89>; +defm ADD : SIMDBinaryInt<add, "add", 110>; +defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 111>; +defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 112>; } // isCommutable = 1 // Integer subtraction: sub / sub_saturate_s / sub_saturate_u -defm SUB : SIMDBinaryInt<sub, "sub", 90>; +defm SUB : SIMDBinaryInt<sub, "sub", 113>; defm SUB_SAT_S : - SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 91>; + SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 114>; defm SUB_SAT_U : - SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 92>; + SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 115>; // Integer multiplication: mul let isCommutable = 1 in -defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 93>; +defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>; // Integer min_s / min_u / max_s / max_u let isCommutable = 1 in { -defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 94>; -defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 95>; -defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 96>; -defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 97>; +defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>; +defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>; +defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>; +defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>; } // isCommutable = 1 // Integer unsigned rounding average: avgr_u -let isCommutable = 1, Predicates = [HasUnimplementedSIMD128] in { -defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 217>; -defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 218>; +let isCommutable = 1 in { +defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>; +defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>; } def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), @@ -749,12 +745,12 @@ def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), "return N->getFlags().hasNoUnsignedWrap();">; foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in -def : Pat<(srl +def : Pat<(wasm_shr_u (add_nuw (add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)), (nodes[1] (i32 1)) ), - (nodes[0] (nodes[1] (i32 1))) + (i32 1) ), (!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>; @@ -763,7 +759,7 @@ let isCommutable = 1 in defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", - 219>; + 180>; //===----------------------------------------------------------------------===// // Floating-point unary arithmetic @@ -771,18 +767,27 @@ defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>; - defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 11)>; + defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; } // Absolute value: abs -defm ABS : SIMDUnaryFP<fabs, "abs", 149>; +defm ABS : SIMDUnaryFP<fabs, "abs", 224>; // Negation: neg -defm NEG : SIMDUnaryFP<fneg, "neg", 150>; +defm NEG : SIMDUnaryFP<fneg, "neg", 225>; // Square root: sqrt -let Predicates = [HasUnimplementedSIMD128] in -defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>; +defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>; + +// Rounding: ceil, floor, trunc, nearest +defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>; +defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>; +defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>; +defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>; +defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>; +defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>; +defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>; +defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>; //===----------------------------------------------------------------------===// // Floating-point binary arithmetic @@ -790,29 +795,34 @@ defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>; multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> { defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>; - defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 11)>; + defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>; } // Addition: add let isCommutable = 1 in -defm ADD : SIMDBinaryFP<fadd, "add", 154>; +defm ADD : SIMDBinaryFP<fadd, "add", 228>; // Subtraction: sub -defm SUB : SIMDBinaryFP<fsub, "sub", 155>; +defm SUB : SIMDBinaryFP<fsub, "sub", 229>; // Multiplication: mul let isCommutable = 1 in -defm MUL : SIMDBinaryFP<fmul, "mul", 156>; +defm MUL : SIMDBinaryFP<fmul, "mul", 230>; // Division: div -let Predicates = [HasUnimplementedSIMD128] in -defm DIV : SIMDBinaryFP<fdiv, "div", 157>; +defm DIV : SIMDBinaryFP<fdiv, "div", 231>; // NaN-propagating minimum: min -defm MIN : SIMDBinaryFP<fminimum, "min", 158>; +defm MIN : SIMDBinaryFP<fminimum, "min", 232>; // NaN-propagating maximum: max -defm MAX : SIMDBinaryFP<fmaximum, "max", 159>; +defm MAX : SIMDBinaryFP<fmaximum, "max", 233>; + +// Pseudo-minimum: pmin +defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>; + +// Pseudo-maximum: pmax +defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>; //===----------------------------------------------------------------------===// // Conversions @@ -826,17 +836,13 @@ multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op, name#"\t$dst, $vec", name, simdop>; } -// Integer to floating point: convert -defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 175>; -defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 176>; -defm "" : SIMDConvert<v2f64, v2i64, sint_to_fp, "f64x2.convert_i64x2_s", 177>; -defm "" : SIMDConvert<v2f64, v2i64, uint_to_fp, "f64x2.convert_i64x2_u", 178>; - // Floating point to integer with saturation: trunc_sat -defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 171>; -defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 172>; -defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_f64x2_s", 173>; -defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_f64x2_u", 174>; +defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>; +defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>; + +// Integer to floating point: convert +defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>; +defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>; // Widening operations multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg, @@ -851,8 +857,8 @@ multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg, vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>; } -defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 202>; -defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 206>; +defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>; +defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>; // Narrowing operations multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg, @@ -871,18 +877,14 @@ multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg, !add(baseInst, 1)>; } -defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 198>; -defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 200>; +defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>; +defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>; // Lower llvm.wasm.trunc.saturate.* to saturating instructions def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; -def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))), - (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>; -def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))), - (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>; // Bitcasts are nops // Matching bitcast t1 to t1 causes strange errors, so avoid repeating types @@ -914,5 +916,5 @@ multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> { vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; } -defm "" : SIMDQFM<v4f32, "f32x4", 0x98>; -defm "" : SIMDQFM<v2f64, "f64x2", 0xa3>; +defm "" : SIMDQFM<v4f32, "f32x4", 252>; +defm "" : SIMDQFM<v2f64, "f64x2", 254>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index 75d04252cbe99..346938daf1aa2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "wasm-late-eh-prepare" @@ -31,12 +32,16 @@ class WebAssemblyLateEHPrepare final : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override; + void recordCatchRetBBs(MachineFunction &MF); bool addCatches(MachineFunction &MF); bool replaceFuncletReturns(MachineFunction &MF); bool removeUnnecessaryUnreachables(MachineFunction &MF); bool addExceptionExtraction(MachineFunction &MF); bool restoreStackPointer(MachineFunction &MF); + MachineBasicBlock *getMatchingEHPad(MachineInstr *MI); + SmallSet<MachineBasicBlock *, 8> CatchRetBBs; + public: static char ID; // Pass identification, replacement for typeid WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {} @@ -57,7 +62,8 @@ FunctionPass *llvm::createWebAssemblyLateEHPrepare() { // possible search paths should be the same. // Returns nullptr in case it does not find any EH pad in the search, or finds // multiple different EH pads. -static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) { +MachineBasicBlock * +WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) { MachineFunction *MF = MI->getParent()->getParent(); SmallVector<MachineBasicBlock *, 2> WL; SmallPtrSet<MachineBasicBlock *, 2> Visited; @@ -76,7 +82,9 @@ static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) { } if (MBB == &MF->front()) return nullptr; - WL.append(MBB->pred_begin(), MBB->pred_end()); + for (auto *Pred : MBB->predecessors()) + if (!CatchRetBBs.count(Pred)) // We don't go into child scopes + WL.push_back(Pred); } return EHPad; } @@ -110,6 +118,7 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; if (MF.getFunction().hasPersonalityFn()) { + recordCatchRetBBs(MF); Changed |= addCatches(MF); Changed |= replaceFuncletReturns(MF); } @@ -121,6 +130,21 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { return Changed; } +// Record which BB ends with 'CATCHRET' instruction, because this will be +// replaced with BRs later. This set of 'CATCHRET' BBs is necessary in +// 'getMatchingEHPad' function. +void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) { + CatchRetBBs.clear(); + for (auto &MBB : MF) { + auto Pos = MBB.getFirstTerminator(); + if (Pos == MBB.end()) + continue; + MachineInstr *TI = &*Pos; + if (TI->getOpcode() == WebAssembly::CATCHRET) + CatchRetBBs.insert(&MBB); + } +} + // Add catch instruction to beginning of catchpads and cleanuppads. bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) { bool Changed = false; @@ -343,7 +367,7 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { "There is no __clang_call_terminate() function"); Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0); - BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID)) + BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL)) .addGlobalAddress(ClangCallTerminateFn) .addReg(Reg); BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE)); @@ -384,8 +408,8 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { ++InsertPos; if (InsertPos->getOpcode() == WebAssembly::CATCH) ++InsertPos; - FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos, - MBB.begin()->getDebugLoc()); + FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB, + InsertPos, MBB.begin()->getDebugLoc()); } return Changed; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 4314aa6115492..01b3aa887738e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -191,7 +191,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp) .addReg(Cond); - MFI.stackifyVReg(Tmp); + MFI.stackifyVReg(MRI, Tmp); Cond = Tmp; Inverted = true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index d1f3acbd221e3..5fce4a600510b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -208,7 +208,8 @@ ///===----------------------------------------------------------------------===// #include "WebAssembly.h" -#include "llvm/IR/CallSite.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CommandLine.h" @@ -220,10 +221,10 @@ using namespace llvm; #define DEBUG_TYPE "wasm-lower-em-ehsjlj" static cl::list<std::string> - EHWhitelist("emscripten-cxx-exceptions-whitelist", + EHAllowlist("emscripten-cxx-exceptions-allowed", cl::desc("The list of function names in which Emscripten-style " "exception handling is enabled (see emscripten " - "EMSCRIPTEN_CATCHING_WHITELIST options)"), + "EMSCRIPTEN_CATCHING_ALLOWED options)"), cl::CommaSeparated); namespace { @@ -247,8 +248,8 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { DenseMap<int, Function *> FindMatchingCatches; // Map of <function signature string, invoke_ wrappers> StringMap<Function *> InvokeWrappers; - // Set of whitelisted function names for exception handling - std::set<std::string> EHWhitelistSet; + // Set of allowed function names for exception handling + std::set<std::string> EHAllowlistSet; StringRef getPassName() const override { return "WebAssembly Lower Emscripten Exceptions"; @@ -258,13 +259,13 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { bool runSjLjOnFunction(Function &F); Function *getFindMatchingCatch(Module &M, unsigned NumClauses); - template <typename CallOrInvoke> Value *wrapInvoke(CallOrInvoke *CI); - void wrapTestSetjmp(BasicBlock *BB, Instruction *InsertPt, Value *Threw, + Value *wrapInvoke(CallBase *CI); + void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw, Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult, BasicBlock *&EndBB); - template <typename CallOrInvoke> Function *getInvokeWrapper(CallOrInvoke *CI); + Function *getInvokeWrapper(CallBase *CI); - bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); } + bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); } bool canLongjmp(Module &M, const Value *Callee) const; bool isEmAsmCall(Module &M, const Value *Callee) const; @@ -275,7 +276,7 @@ public: WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true) : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) { - EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end()); + EHAllowlistSet.insert(EHAllowlist.begin(), EHAllowlist.end()); } bool runOnModule(Module &M) override; @@ -337,13 +338,31 @@ static std::string getSignature(FunctionType *FTy) { if (FTy->isVarArg()) OS << "_..."; Sig = OS.str(); - Sig.erase(remove_if(Sig, isspace), Sig.end()); + Sig.erase(remove_if(Sig, isSpace), Sig.end()); // When s2wasm parses .s file, a comma means the end of an argument. So a // mangled function name can contain any character but a comma. std::replace(Sig.begin(), Sig.end(), ',', '.'); return Sig; } +static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name, + Module *M) { + Function* F = Function::Create(Ty, GlobalValue::ExternalLinkage, Name, M); + // Tell the linker that this function is expected to be imported from the + // 'env' module. + if (!F->hasFnAttribute("wasm-import-module")) { + llvm::AttrBuilder B; + B.addAttribute("wasm-import-module", "env"); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + if (!F->hasFnAttribute("wasm-import-name")) { + llvm::AttrBuilder B; + B.addAttribute("wasm-import-name", F->getName()); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + return F; +} + // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. // This is because a landingpad instruction contains two more arguments, a // personality function and a cleanup bit, and __cxa_find_matching_catch_N @@ -357,9 +376,8 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy); FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false); - Function *F = Function::Create( - FTy, GlobalValue::ExternalLinkage, - "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M); + Function *F = getEmscriptenFunction( + FTy, "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M); FindMatchingCatches[NumClauses] = F; return F; } @@ -371,15 +389,14 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, // %__THREW__.val = __THREW__; __THREW__ = 0; // Returns %__THREW__.val, which indicates whether an exception is thrown (or // whether longjmp occurred), for future use. -template <typename CallOrInvoke> -Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { +Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { LLVMContext &C = CI->getModule()->getContext(); // If we are calling a function that is noreturn, we must remove that // attribute. The code we insert here does expect it to return, after we // catch the exception. if (CI->doesNotReturn()) { - if (auto *F = dyn_cast<Function>(CI->getCalledValue())) + if (auto *F = CI->getCalledFunction()) F->removeFnAttr(Attribute::NoReturn); CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn); } @@ -395,7 +412,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { SmallVector<Value *, 16> Args; // Put the pointer to the callee as first argument, so it can be called // within the invoke wrapper later - Args.push_back(CI->getCalledValue()); + Args.push_back(CI->getCalledOperand()); Args.append(CI->arg_begin(), CI->arg_end()); CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args); NewCall->takeName(CI); @@ -443,18 +460,10 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { } // Get matching invoke wrapper based on callee signature -template <typename CallOrInvoke> -Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { +Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) { Module *M = CI->getModule(); SmallVector<Type *, 16> ArgTys; - Value *Callee = CI->getCalledValue(); - FunctionType *CalleeFTy; - if (auto *F = dyn_cast<Function>(Callee)) - CalleeFTy = F->getFunctionType(); - else { - auto *CalleeTy = cast<PointerType>(Callee->getType())->getElementType(); - CalleeFTy = cast<FunctionType>(CalleeTy); - } + FunctionType *CalleeFTy = CI->getFunctionType(); std::string Sig = getSignature(CalleeFTy); if (InvokeWrappers.find(Sig) != InvokeWrappers.end()) @@ -467,8 +476,7 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys, CalleeFTy->isVarArg()); - Function *F = - Function::Create(FTy, GlobalValue::ExternalLinkage, "__invoke_" + Sig, M); + Function *F = getEmscriptenFunction(FTy, "__invoke_" + Sig, M); InvokeWrappers[Sig] = F; return F; } @@ -538,13 +546,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M, // As output parameters. returns %label, %longjmp_result, and the BB the last // instruction (%longjmp_result = ...) is in. void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( - BasicBlock *BB, Instruction *InsertPt, Value *Threw, Value *SetjmpTable, + BasicBlock *BB, DebugLoc DL, Value *Threw, Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult, BasicBlock *&EndBB) { Function *F = BB->getParent(); LLVMContext &C = BB->getModule()->getContext(); IRBuilder<> IRB(C); - IRB.SetInsertPoint(InsertPt); + IRB.SetCurrentDebugLocation(DL); // if (%__THREW__.val != 0 & threwValue != 0) IRB.SetInsertPoint(BB); @@ -639,12 +647,11 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { // exception handling and setjmp/longjmp handling ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__"); ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue"); - GetTempRet0Func = - Function::Create(FunctionType::get(IRB.getInt32Ty(), false), - GlobalValue::ExternalLinkage, "getTempRet0", &M); - SetTempRet0Func = Function::Create( + GetTempRet0Func = getEmscriptenFunction( + FunctionType::get(IRB.getInt32Ty(), false), "getTempRet0", &M); + SetTempRet0Func = getEmscriptenFunction( FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false), - GlobalValue::ExternalLinkage, "setTempRet0", &M); + "setTempRet0", &M); GetTempRet0Func->setDoesNotThrow(); SetTempRet0Func->setDoesNotThrow(); @@ -655,14 +662,12 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { // Register __resumeException function FunctionType *ResumeFTy = FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false); - ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage, - "__resumeException", &M); + ResumeF = getEmscriptenFunction(ResumeFTy, "__resumeException", &M); // Register llvm_eh_typeid_for function FunctionType *EHTypeIDTy = FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false); - EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage, - "llvm_eh_typeid_for", &M); + EHTypeIDF = getEmscriptenFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M); for (Function &F : M) { if (F.isDeclaration()) @@ -678,34 +683,30 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { if (LongjmpF) { // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is // defined in JS code - EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(), - GlobalValue::ExternalLinkage, - "emscripten_longjmp_jmpbuf", &M); - + EmLongjmpJmpbufF = getEmscriptenFunction(LongjmpF->getFunctionType(), + "emscripten_longjmp_jmpbuf", &M); LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF); } if (SetjmpF) { // Register saveSetjmp function FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); - SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0), - IRB.getInt32Ty(), Type::getInt32PtrTy(C), - IRB.getInt32Ty()}; FunctionType *FTy = - FunctionType::get(Type::getInt32PtrTy(C), Params, false); - SaveSetjmpF = - Function::Create(FTy, GlobalValue::ExternalLinkage, "saveSetjmp", &M); + FunctionType::get(Type::getInt32PtrTy(C), + {SetjmpFTy->getParamType(0), IRB.getInt32Ty(), + Type::getInt32PtrTy(C), IRB.getInt32Ty()}, + false); + SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M); // Register testSetjmp function - Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}; - FTy = FunctionType::get(IRB.getInt32Ty(), Params, false); - TestSetjmpF = - Function::Create(FTy, GlobalValue::ExternalLinkage, "testSetjmp", &M); + FTy = FunctionType::get( + IRB.getInt32Ty(), + {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}, false); + TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M); FTy = FunctionType::get(IRB.getVoidTy(), {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); - EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, - "emscripten_longjmp", &M); + EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M); // Only traverse functions that uses setjmp in order not to insert // unnecessary prep / cleanup code in every function @@ -744,17 +745,18 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { bool Changed = false; SmallVector<Instruction *, 64> ToErase; SmallPtrSet<LandingPadInst *, 32> LandingPads; - bool AllowExceptions = - areAllExceptionsAllowed() || EHWhitelistSet.count(F.getName()); + bool AllowExceptions = areAllExceptionsAllowed() || + EHAllowlistSet.count(std::string(F.getName())); for (BasicBlock &BB : F) { auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); if (!II) continue; + Changed = true; LandingPads.insert(II->getLandingPadInst()); IRB.SetInsertPoint(II); - bool NeedInvoke = AllowExceptions && canThrow(II->getCalledValue()); + bool NeedInvoke = AllowExceptions && canThrow(II->getCalledOperand()); if (NeedInvoke) { // Wrap invoke with invoke wrapper and generate preamble/postamble Value *Threw = wrapInvoke(II); @@ -769,7 +771,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { // call+branch SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end()); CallInst *NewCall = - IRB.CreateCall(II->getFunctionType(), II->getCalledValue(), Args); + IRB.CreateCall(II->getFunctionType(), II->getCalledOperand(), Args); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setDebugLoc(II->getDebugLoc()); @@ -791,6 +793,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { auto *RI = dyn_cast<ResumeInst>(&I); if (!RI) continue; + Changed = true; // Split the input into legal values Value *Input = RI->getValue(); @@ -815,6 +818,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { continue; if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) continue; + Changed = true; IRB.SetInsertPoint(CI); CallInst *NewCI = @@ -830,7 +834,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { if (auto *LPI = dyn_cast<LandingPadInst>(I)) LandingPads.insert(LPI); } - Changed = !LandingPads.empty(); + Changed |= !LandingPads.empty(); // Handle all the landingpad for this function together, as multiple invokes // may share a single lp @@ -871,6 +875,27 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { return Changed; } +// This tries to get debug info from the instruction before which a new +// instruction will be inserted, and if there's no debug info in that +// instruction, tries to get the info instead from the previous instruction (if +// any). If none of these has debug info and a DISubprogram is provided, it +// creates a dummy debug info with the first line of the function, because IR +// verifier requires all inlinable callsites should have debug info when both a +// caller and callee have DISubprogram. If none of these conditions are met, +// returns empty info. +static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, + DISubprogram *SP) { + assert(InsertBefore); + if (InsertBefore->getDebugLoc()) + return InsertBefore->getDebugLoc(); + const Instruction *Prev = InsertBefore->getPrevNode(); + if (Prev && Prev->getDebugLoc()) + return Prev->getDebugLoc(); + if (SP) + return DILocation::get(SP->getContext(), SP->getLine(), 1, SP); + return DebugLoc(); +} + bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { Module &M = *F.getParent(); LLVMContext &C = F.getContext(); @@ -888,13 +913,22 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { // this instruction to a constant 4, because this value will be used in // SSAUpdater.AddAvailableValue(...) later. BasicBlock &EntryBB = F.getEntryBlock(); + DebugLoc FirstDL = getOrCreateDebugLoc(&*EntryBB.begin(), F.getSubprogram()); BinaryOperator *SetjmpTableSize = BinaryOperator::Create( Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize", &*EntryBB.getFirstInsertionPt()); + SetjmpTableSize->setDebugLoc(FirstDL); // setjmpTable = (int *) malloc(40); Instruction *SetjmpTable = CallInst::CreateMalloc( SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40), nullptr, nullptr, "setjmpTable"); + SetjmpTable->setDebugLoc(FirstDL); + // CallInst::CreateMalloc may return a bitcast instruction if the result types + // mismatch. We need to set the debug loc for the original call too. + auto *MallocCall = SetjmpTable->stripPointerCasts(); + if (auto *MallocCallI = dyn_cast<Instruction>(MallocCall)) { + MallocCallI->setDebugLoc(FirstDL); + } // setjmpTable[0] = 0; IRB.SetInsertPoint(SetjmpTableSize); IRB.CreateStore(IRB.getInt32(0), SetjmpTable); @@ -963,7 +997,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { if (!CI) continue; - const Value *Callee = CI->getCalledValue(); + const Value *Callee = CI->getCalledOperand(); if (!canLongjmp(M, Callee)) continue; if (isEmAsmCall(M, Callee)) @@ -1024,12 +1058,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { Value *Label = nullptr; Value *LongjmpResult = nullptr; BasicBlock *EndBB = nullptr; - wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label, - LongjmpResult, EndBB); + wrapTestSetjmp(BB, CI->getDebugLoc(), Threw, SetjmpTable, SetjmpTableSize, + Label, LongjmpResult, EndBB); assert(Label && LongjmpResult && EndBB); // Create switch instruction IRB.SetInsertPoint(EndBB); + IRB.SetCurrentDebugLocation(EndBB->getInstList().back().getDebugLoc()); SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size()); // -1 means no longjmp happened, continue normally (will hit the default // switch case). 0 means a longjmp that is not ours to handle, needs a @@ -1053,8 +1088,17 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { // Free setjmpTable buffer before each return instruction for (BasicBlock &BB : F) { Instruction *TI = BB.getTerminator(); - if (isa<ReturnInst>(TI)) - CallInst::CreateFree(SetjmpTable, TI); + if (isa<ReturnInst>(TI)) { + DebugLoc DL = getOrCreateDebugLoc(TI, F.getSubprogram()); + auto *Free = CallInst::CreateFree(SetjmpTable, TI); + Free->setDebugLoc(DL); + // CallInst::CreateFree may create a bitcast instruction if its argument + // types mismatch. We need to set the debug loc for the bitcast too. + if (auto *FreeCallI = dyn_cast<CallInst>(Free)) { + if (auto *BitCastI = dyn_cast<BitCastInst>(FreeCallI->getArgOperand(0))) + BitCastI->setDebugLoc(DL); + } + } } // Every call to saveSetjmp can change setjmpTable and setjmpTableSize diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp index 750b2233e67ab..9ccbee819c357 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp @@ -76,9 +76,13 @@ bool LowerGlobalDtors::runOnModule(Module &M) { !ETy->getTypeAtIndex(2U)->isPointerTy()) return false; // Not (int, ptr, ptr). - // Collect the contents of @llvm.global_dtors, collated by priority and - // associated symbol. - std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs; + // Collect the contents of @llvm.global_dtors, ordered by priority. Within a + // priority, sequences of destructors with the same associated object are + // recorded so that we can register them as a group. + std::map< + uint16_t, + std::vector<std::pair<Constant *, std::vector<Constant *>>> + > DtorFuncs; for (Value *O : InitList->operands()) { auto *CS = dyn_cast<ConstantStruct>(O); if (!CS) @@ -96,7 +100,14 @@ bool LowerGlobalDtors::runOnModule(Module &M) { Constant *Associated = CS->getOperand(2); Associated = cast<Constant>(Associated->stripPointerCasts()); - DtorFuncs[PriorityValue][Associated].push_back(DtorFunc); + auto &AtThisPriority = DtorFuncs[PriorityValue]; + if (AtThisPriority.empty() || AtThisPriority.back().first != Associated) { + std::vector<Constant *> NewList; + NewList.push_back(DtorFunc); + AtThisPriority.push_back(std::make_pair(Associated, NewList)); + } else { + AtThisPriority.back().second.push_back(DtorFunc); + } } if (DtorFuncs.empty()) return false; @@ -131,14 +142,19 @@ bool LowerGlobalDtors::runOnModule(Module &M) { // first function with __cxa_atexit. for (auto &PriorityAndMore : DtorFuncs) { uint16_t Priority = PriorityAndMore.first; - for (auto &AssociatedAndMore : PriorityAndMore.second) { + uint64_t Id = 0; + auto &AtThisPriority = PriorityAndMore.second; + for (auto &AssociatedAndMore : AtThisPriority) { Constant *Associated = AssociatedAndMore.first; + auto ThisId = Id++; Function *CallDtors = Function::Create( AtExitFuncTy, Function::PrivateLinkage, "call_dtors" + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) : Twine()) + + (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) + : Twine()) + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) : Twine()), &M); @@ -146,7 +162,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) { FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false); - for (auto Dtor : AssociatedAndMore.second) + for (auto Dtor : reverse(AssociatedAndMore.second)) CallInst::Create(VoidVoid, Dtor, "", BB); ReturnInst::Create(C, BB); @@ -155,6 +171,8 @@ bool LowerGlobalDtors::runOnModule(Module &M) { "register_call_dtors" + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) : Twine()) + + (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId) + : Twine()) + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) : Twine()), &M); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 59c10243c545e..304dca2ebfe4a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -13,10 +13,11 @@ //===----------------------------------------------------------------------===// #include "WebAssemblyMCInstLower.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "TargetInfo/WebAssemblyTargetInfo.h" #include "WebAssemblyAsmPrinter.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblyRuntimeLibcallSignatures.h" -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Constants.h" @@ -29,11 +30,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -// Defines llvm::WebAssembly::getStackOpcode to convert register instructions to -// stack instructions -#define GET_INSTRMAP_INFO 1 -#include "WebAssemblyGenInstrInfo.inc" - // This disables the removal of registers when lowering into MC, as required // by some current tests. cl::opt<bool> @@ -56,7 +52,8 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { SmallVector<MVT, 1> ResultMVTs; SmallVector<MVT, 4> ParamMVTs; - computeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs); + const auto *const F = dyn_cast<Function>(Global); + computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs); auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs); WasmSym->setSignature(Signature.get()); @@ -84,8 +81,9 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType(wasm::WasmGlobalType{ - uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 - : wasm::WASM_TYPE_I32), + uint8_t(Subtarget.hasAddr64() && strcmp(Name, "__table_base") != 0 + ? wasm::WASM_TYPE_I64 + : wasm::WASM_TYPE_I32), Mutable}); return WasmSym; } @@ -208,6 +206,7 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, OutMI.setOpcode(MI->getOpcode()); const MCInstrDesc &Desc = MI->getDesc(); + unsigned NumVariadicDefs = MI->getNumExplicitDefs() - Desc.getNumDefs(); for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); @@ -229,9 +228,10 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, MCOp = MCOperand::createReg(WAReg); break; } - case MachineOperand::MO_Immediate: - if (I < Desc.NumOperands) { - const MCOperandInfo &Info = Desc.OpInfo[I]; + case MachineOperand::MO_Immediate: { + unsigned DescIndex = I - NumVariadicDefs; + if (DescIndex < Desc.NumOperands) { + const MCOperandInfo &Info = Desc.OpInfo[DescIndex]; if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) { SmallVector<wasm::ValType, 4> Returns; SmallVector<wasm::ValType, 4> Params; @@ -270,6 +270,7 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, } MCOp = MCOperand::createImm(MO.getImm()); break; + } case MachineOperand::MO_FPImmediate: { // TODO: MC converts all floating point immediate operands to double. // This is fine for numeric values, but may cause NaNs to change bits. @@ -306,13 +307,15 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, if (!WasmKeepRegisters) removeRegisterOperands(MI, OutMI); + else if (Desc.variadicOpsAreDefs()) + OutMI.insert(OutMI.begin(), MCOperand::createImm(MI->getNumExplicitDefs())); } static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI) { // Remove all uses of stackified registers to bring the instruction format // into its final stack form used thruout MC, and transition opcodes to // their _S variant. - // We do this seperate from the above code that still may need these + // We do this separate from the above code that still may need these // registers for e.g. call_indirect signatures. // See comments in lib/Target/WebAssembly/WebAssemblyInstrFormats.td for // details. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index e4cc2389147bc..adee2f0553f97 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -16,14 +16,15 @@ #include "WebAssemblyISelLowering.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() = default; // anchor. -void WebAssemblyFunctionInfo::initWARegs() { +void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) { assert(WARegs.empty()); unsigned Reg = UnusedReg; - WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg); + WARegs.resize(MRI.getNumVirtRegs(), Reg); } void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM, @@ -42,15 +43,17 @@ void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM, } } -void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F, +void llvm::computeSignatureVTs(const FunctionType *Ty, + const Function *TargetFunc, + const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) { - computeLegalValueVTs(F, TM, Ty->getReturnType(), Results); + computeLegalValueVTs(ContextFunc, TM, Ty->getReturnType(), Results); MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()); if (Results.size() > 1 && - !TM.getSubtarget<WebAssemblySubtarget>(F).hasMultivalue()) { + !TM.getSubtarget<WebAssemblySubtarget>(ContextFunc).hasMultivalue()) { // WebAssembly can't lower returns of multiple values without demoting to // sret unless multivalue is enabled (see // WebAssemblyTargetLowering::CanLowerReturn). So replace multiple return @@ -60,9 +63,28 @@ void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F, } for (auto *Param : Ty->params()) - computeLegalValueVTs(F, TM, Param, Params); + computeLegalValueVTs(ContextFunc, TM, Param, Params); if (Ty->isVarArg()) Params.push_back(PtrVT); + + // For swiftcc, emit additional swiftself and swifterror parameters + // if there aren't. These additional parameters are also passed for caller. + // They are necessary to match callee and caller signature for indirect + // call. + + if (TargetFunc && TargetFunc->getCallingConv() == CallingConv::Swift) { + MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()); + bool HasSwiftErrorArg = false; + bool HasSwiftSelfArg = false; + for (const auto &Arg : TargetFunc->args()) { + HasSwiftErrorArg |= Arg.hasAttribute(Attribute::SwiftError); + HasSwiftSelfArg |= Arg.hasAttribute(Attribute::SwiftSelf); + } + if (!HasSwiftErrorArg) + Params.push_back(PtrVT); + if (!HasSwiftSelfArg) + Params.push_back(PtrVT); + } } void llvm::valTypesFromMVTs(const ArrayRef<MVT> &In, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 16e2f4392984c..ca164fdd182cd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -30,8 +30,6 @@ struct WebAssemblyFunctionInfo; /// This class is derived from MachineFunctionInfo and contains private /// WebAssembly-specific information for each MachineFunction. class WebAssemblyFunctionInfo final : public MachineFunctionInfo { - MachineFunction &MF; - std::vector<MVT> Params; std::vector<MVT> Results; std::vector<MVT> Locals; @@ -55,12 +53,18 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { // A virtual register holding the base pointer for functions that have // overaligned values on the user stack. unsigned BasePtrVreg = -1U; + // A virtual register holding the frame base. This is either FP or SP + // after it has been replaced by a vreg + unsigned FrameBaseVreg = -1U; + // The local holding the frame base. This is either FP or SP + // after WebAssemblyExplicitLocals + unsigned FrameBaseLocal = -1U; // Function properties. bool CFGStackified = false; public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} + explicit WebAssemblyFunctionInfo(MachineFunction &MF) {} ~WebAssemblyFunctionInfo() override; void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI); @@ -90,12 +94,25 @@ public: assert(BasePtrVreg != -1U && "Base ptr vreg hasn't been set"); return BasePtrVreg; } + void setFrameBaseVreg(unsigned Reg) { FrameBaseVreg = Reg; } + unsigned getFrameBaseVreg() const { + assert(FrameBaseVreg != -1U && "Frame base vreg hasn't been set"); + return FrameBaseVreg; + } + void clearFrameBaseVreg() { FrameBaseVreg = -1U; } + // Return true if the frame base physreg has been replaced by a virtual reg. + bool isFrameBaseVirtual() const { return FrameBaseVreg != -1U; } + void setFrameBaseLocal(unsigned Local) { FrameBaseLocal = Local; } + unsigned getFrameBaseLocal() const { + assert(FrameBaseLocal != -1U && "Frame base local hasn't been set"); + return FrameBaseLocal; + } void setBasePointerVreg(unsigned Reg) { BasePtrVreg = Reg; } static const unsigned UnusedReg = -1u; - void stackifyVReg(unsigned VReg) { - assert(MF.getRegInfo().getUniqueVRegDef(VReg)); + void stackifyVReg(MachineRegisterInfo &MRI, unsigned VReg) { + assert(MRI.getUniqueVRegDef(VReg)); auto I = Register::virtReg2Index(VReg); if (I >= VRegStackified.size()) VRegStackified.resize(I + 1); @@ -113,7 +130,7 @@ public: return VRegStackified.test(I); } - void initWARegs(); + void initWARegs(MachineRegisterInfo &MRI); void setWAReg(unsigned VReg, unsigned WAReg) { assert(WAReg != UnusedReg); auto I = Register::virtReg2Index(VReg); @@ -140,9 +157,10 @@ void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty, SmallVectorImpl<MVT> &ValueVTs); // Compute the signature for a given FunctionType (Ty). Note that it's not the -// signature for F (F is just used to get varous context) -void computeSignatureVTs(const FunctionType *Ty, const Function &F, - const TargetMachine &TM, SmallVectorImpl<MVT> &Params, +// signature for ContextFunc (ContextFunc is just used to get varous context) +void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, + const Function &ContextFunc, const TargetMachine &TM, + SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results); void valTypesFromMVTs(const ArrayRef<MVT> &In, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp index ac428fcc826a7..9aea65cba280c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp @@ -201,8 +201,7 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; - case WebAssembly::CALL_i32: - case WebAssembly::CALL_i64: + case WebAssembly::CALL: Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo); break; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index 0bd30791e57cd..a2da0ea849e04 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -20,6 +20,7 @@ //===----------------------------------------------------------------------===// #include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -82,10 +83,22 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( SmallVector<LiveInterval *, 4> SplitLIs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { unsigned Reg = Register::index2VirtReg(I); + auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); + if (MRI.reg_nodbg_empty(Reg)) continue; LIS.splitSeparateComponents(LIS.getInterval(Reg), SplitLIs); + if (Reg == TRI.getFrameRegister(MF) && SplitLIs.size() > 0) { + // The live interval for the frame register was split, resulting in a new + // VReg. For now we only support debug info output for a single frame base + // value for the function, so just use the last one. It will certainly be + // wrong for some part of the function, but until we are able to track + // values through live-range splitting and stackification, it will have to + // do. + MF.getInfo<WebAssemblyFunctionInfo>()->setFrameBaseVreg( + SplitLIs.back()->reg); + } SplitLIs.clear(); } @@ -103,5 +116,5 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( } } - return false; + return true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp index 9b60596e42b43..96390de8f5e7d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -42,7 +42,7 @@ public: static char ID; OptimizeReturned() : FunctionPass(ID) {} - void visitCallSite(CallSite CS); + void visitCallBase(CallBase &CB); }; } // End anonymous namespace @@ -55,17 +55,16 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() { return new OptimizeReturned(); } -void OptimizeReturned::visitCallSite(CallSite CS) { - for (unsigned I = 0, E = CS.getNumArgOperands(); I < E; ++I) - if (CS.paramHasAttr(I, Attribute::Returned)) { - Instruction *Inst = CS.getInstruction(); - Value *Arg = CS.getArgOperand(I); +void OptimizeReturned::visitCallBase(CallBase &CB) { + for (unsigned I = 0, E = CB.getNumArgOperands(); I < E; ++I) + if (CB.paramHasAttr(I, Attribute::Returned)) { + Value *Arg = CB.getArgOperand(I); // Ignore constants, globals, undef, etc. if (isa<Constant>(Arg)) continue; // Like replaceDominatedUsesWith but using Instruction/Use dominance. - Arg->replaceUsesWithIf(Inst, - [&](Use &U) { return DT->dominates(Inst, U); }); + Arg->replaceUsesWithIf(&CB, + [&](Use &U) { return DT->dominates(&CB, U); }); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index ea6cd09a604c5..a587c9d23d2b7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -66,7 +66,7 @@ static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg, Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); MO.setReg(NewReg); MO.setIsDead(); - MFI.stackifyVReg(NewReg); + MFI.stackifyVReg(MRI, NewReg); } return Changed; } @@ -121,7 +121,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg) .addReg(Reg); MO.setReg(NewReg); - MFI.stackifyVReg(NewReg); + MFI.stackifyVReg(MRI, NewReg); } } @@ -149,8 +149,7 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; - case WebAssembly::CALL_i32: - case WebAssembly::CALL_i64: { + case WebAssembly::CALL: { MachineOperand &Op1 = MI.getOperand(1); if (Op1.isSymbol()) { StringRef Name(Op1.getSymbolName()); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 043b6f1b7d18a..20fe2b2b7bfc5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -157,6 +157,9 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { Changed |= Old != New; UsedColors.set(Color); Assignments[Color].push_back(LI); + // If we reassigned the stack pointer, update the debug frame base info. + if (Old != New && MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Old) + MFI.setFrameBaseVreg(New); LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg) << " to vreg" << Register::virtReg2Index(New) << "\n"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index 72e7a7cf50425..b655014f4a90f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -66,7 +66,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - MFI.initWARegs(); + MFI.initWARegs(MRI); // WebAssembly argument registers are in the same index space as local // variables. Assign the numbers for them first. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 421d353a89e88..1d4e2e3a8f9e5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <iterator> using namespace llvm; #define DEBUG_TYPE "wasm-reg-stackify" @@ -120,6 +121,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI, Type::getDoubleTy(MF.getFunction().getContext()))); MI->addOperand(MachineOperand::CreateFPImm(Val)); } else if (RegClass == &WebAssembly::V128RegClass) { + // TODO: Replace this with v128.const 0 once that is supported in V8 Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32)); MI->addOperand(MachineOperand::CreateReg(TempReg, false)); @@ -135,12 +137,12 @@ static void convertImplicitDefToConstZero(MachineInstr *MI, // Determine whether a call to the callee referenced by // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side // effects. -static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read, - bool &Write, bool &Effects, bool &StackPointer) { +static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write, + bool &Effects, bool &StackPointer) { // All calls can use the stack pointer. StackPointer = true; - const MachineOperand &MO = MI.getOperand(CalleeOpNo); + const MachineOperand &MO = WebAssembly::getCalleeOp(MI); if (MO.isGlobal()) { const Constant *GV = MO.getGlobal(); if (const auto *GA = dyn_cast<GlobalAlias>(GV)) @@ -246,14 +248,14 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, } // Check for writes to __stack_pointer global. - if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 && + if ((MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 || + MI.getOpcode() == WebAssembly::GLOBAL_SET_I64) && strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0) StackPointer = true; // Analyze calls. if (MI.isCall()) { - unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode()); - queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer); + queryCallee(MI, Read, Write, Effects, StackPointer); } } @@ -313,25 +315,59 @@ static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, // walking the block. // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be // more precise. -static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, - AliasAnalysis &AA, const MachineRegisterInfo &MRI) { - assert(Def->getParent() == Insert->getParent()); +static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, + const MachineInstr *Insert, AliasAnalysis &AA, + const WebAssemblyFunctionInfo &MFI, + const MachineRegisterInfo &MRI) { + const MachineInstr *DefI = Def->getParent(); + const MachineInstr *UseI = Use->getParent(); + assert(DefI->getParent() == Insert->getParent()); + assert(UseI->getParent() == Insert->getParent()); + + // The first def of a multivalue instruction can be stackified by moving, + // since the later defs can always be placed into locals if necessary. Later + // defs can only be stackified if all previous defs are already stackified + // since ExplicitLocals will not know how to place a def in a local if a + // subsequent def is stackified. But only one def can be stackified by moving + // the instruction, so it must be the first one. + // + // TODO: This could be loosened to be the first *live* def, but care would + // have to be taken to ensure the drops of the initial dead defs can be + // placed. This would require checking that no previous defs are used in the + // same instruction as subsequent defs. + if (Def != DefI->defs().begin()) + return false; + + // If any subsequent def is used prior to the current value by the same + // instruction in which the current value is used, we cannot + // stackify. Stackifying in this case would require that def moving below the + // current def in the stack, which cannot be achieved, even with locals. + for (const auto &SubsequentDef : drop_begin(DefI->defs(), 1)) { + for (const auto &PriorUse : UseI->uses()) { + if (&PriorUse == Use) + break; + if (PriorUse.isReg() && SubsequentDef.getReg() == PriorUse.getReg()) + return false; + } + } + + // If moving is a semantic nop, it is always allowed + const MachineBasicBlock *MBB = DefI->getParent(); + auto NextI = std::next(MachineBasicBlock::const_iterator(DefI)); + for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI) + ; + if (NextI == Insert) + return true; // 'catch' and 'extract_exception' should be the first instruction of a BB and // cannot move. - if (Def->getOpcode() == WebAssembly::CATCH || - Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) { - const MachineBasicBlock *MBB = Def->getParent(); - auto NextI = std::next(MachineBasicBlock::const_iterator(Def)); - for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI) - ; - if (NextI != Insert) - return false; - } + if (DefI->getOpcode() == WebAssembly::CATCH || + DefI->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) + return false; // Check for register dependencies. SmallVector<unsigned, 4> MutableRegisters; - for (const MachineOperand &MO : Def->operands()) { + for (const MachineOperand &MO : DefI->operands()) { if (!MO.isReg() || MO.isUndef()) continue; Register Reg = MO.getReg(); @@ -361,7 +397,7 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, } bool Read = false, Write = false, Effects = false, StackPointer = false; - query(*Def, AA, Read, Write, Effects, StackPointer); + query(*DefI, AA, Read, Write, Effects, StackPointer); // If the instruction does not access memory and has no side effects, it has // no additional dependencies. @@ -369,8 +405,8 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters) return true; - // Scan through the intervening instructions between Def and Insert. - MachineBasicBlock::const_iterator D(Def), I(Insert); + // Scan through the intervening instructions between DefI and Insert. + MachineBasicBlock::const_iterator D(DefI), I(Insert); for (--I; I != D; --I) { bool InterveningRead = false; bool InterveningWrite = false; @@ -495,7 +531,7 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op, if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) { // No one else is using this register for anything so we can just stackify // it in place. - MFI.stackifyVReg(Reg); + MFI.stackifyVReg(MRI, Reg); } else { // The register may have unrelated uses or defs; create a new register for // just our one def and use so that we can stackify it. @@ -512,7 +548,7 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op, LIS.getInstructionIndex(*Op.getParent()).getRegSlot(), /*RemoveDeadValNo=*/true); - MFI.stackifyVReg(NewReg); + MFI.stackifyVReg(MRI, NewReg); DefDIs.updateReg(NewReg); @@ -541,7 +577,7 @@ static MachineInstr *rematerializeCheapDef( MachineInstr *Clone = &*std::prev(Insert); LIS.InsertMachineInstrInMaps(*Clone); LIS.createAndComputeVirtRegInterval(NewReg); - MFI.stackifyVReg(NewReg); + MFI.stackifyVReg(MRI, NewReg); imposeStackOrdering(Clone); LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump()); @@ -632,8 +668,8 @@ static MachineInstr *moveAndTeeForMultiUse( // Finish stackifying the new regs. LIS.createAndComputeVirtRegInterval(TeeReg); LIS.createAndComputeVirtRegInterval(DefReg); - MFI.stackifyVReg(DefReg); - MFI.stackifyVReg(TeeReg); + MFI.stackifyVReg(MRI, DefReg); + MFI.stackifyVReg(MRI, TeeReg); imposeStackOrdering(Def); imposeStackOrdering(Tee); @@ -801,32 +837,32 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { CommutingState Commuting; TreeWalkerState TreeWalker(Insert); while (!TreeWalker.done()) { - MachineOperand &Op = TreeWalker.pop(); + MachineOperand &Use = TreeWalker.pop(); // We're only interested in explicit virtual register operands. - if (!Op.isReg()) + if (!Use.isReg()) continue; - Register Reg = Op.getReg(); - assert(Op.isUse() && "explicit_uses() should only iterate over uses"); - assert(!Op.isImplicit() && + Register Reg = Use.getReg(); + assert(Use.isUse() && "explicit_uses() should only iterate over uses"); + assert(!Use.isImplicit() && "explicit_uses() should only iterate over explicit operands"); if (Register::isPhysicalRegister(Reg)) continue; // Identify the definition for this register at this point. - MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS); - if (!Def) + MachineInstr *DefI = getVRegDef(Reg, Insert, MRI, LIS); + if (!DefI) continue; // Don't nest an INLINE_ASM def into anything, because we don't have // constraints for $pop outputs. - if (Def->isInlineAsm()) + if (DefI->isInlineAsm()) continue; // Argument instructions represent live-in registers and not real // instructions. - if (WebAssembly::isArgument(Def->getOpcode())) + if (WebAssembly::isArgument(DefI->getOpcode())) continue; // Currently catch's return value register cannot be stackified, because @@ -843,28 +879,38 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // register should be assigned to a local to be propagated across // 'block' boundary now. // - // TODO Fix this once we support the multi-value proposal. - if (Def->getOpcode() == WebAssembly::CATCH) + // TODO: Fix this once we support the multivalue blocks + if (DefI->getOpcode() == WebAssembly::CATCH) continue; + MachineOperand *Def = DefI->findRegisterDefOperand(Reg); + assert(Def != nullptr); + // Decide which strategy to take. Prefer to move a single-use value // over cloning it, and prefer cloning over introducing a tee. // For moving, we require the def to be in the same block as the use; // this makes things simpler (LiveIntervals' handleMove function only // supports intra-block moves) and it's MachineSink's job to catch all // the sinking opportunities anyway. - bool SameBlock = Def->getParent() == &MBB; - bool CanMove = SameBlock && isSafeToMove(Def, Insert, AA, MRI) && + bool SameBlock = DefI->getParent() == &MBB; + bool CanMove = SameBlock && + isSafeToMove(Def, &Use, Insert, AA, MFI, MRI) && !TreeWalker.isOnStack(Reg); - if (CanMove && hasOneUse(Reg, Def, MRI, MDT, LIS)) { - Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI); - } else if (shouldRematerialize(*Def, AA, TII)) { + if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) { + Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI); + + // If we are removing the frame base reg completely, remove the debug + // info as well. + // TODO: Encode this properly as a stackified value. + if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) + MFI.clearFrameBaseVreg(); + } else if (shouldRematerialize(*DefI, AA, TII)) { Insert = - rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(), + rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(), LIS, MFI, MRI, TII, TRI); - } else if (CanMove && - oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) { - Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, + } else if (CanMove && oneUseDominatesOtherUses(Reg, Use, MBB, MRI, MDT, + LIS, MFI)) { + Insert = moveAndTeeForMultiUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI, TII); } else { // We failed to stackify the operand. If the problem was ordering @@ -875,6 +921,25 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; } + // Stackifying a multivalue def may unlock in-place stackification of + // subsequent defs. TODO: Handle the case where the consecutive uses are + // not all in the same instruction. + auto *SubsequentDef = Insert->defs().begin(); + auto *SubsequentUse = &Use; + while (SubsequentDef != Insert->defs().end() && + SubsequentUse != Use.getParent()->uses().end()) { + if (!SubsequentDef->isReg() || !SubsequentUse->isReg()) + break; + unsigned DefReg = SubsequentDef->getReg(); + unsigned UseReg = SubsequentUse->getReg(); + // TODO: This single-use restriction could be relaxed by using tees + if (DefReg != UseReg || !MRI.hasOneUse(DefReg)) + break; + MFI.stackifyVReg(MRI, DefReg); + ++SubsequentDef; + ++SubsequentUse; + } + // If the instruction we just stackified is an IMPLICIT_DEF, convert it // to a constant 0 so that the def is explicit, and the push/pop // correspondence is maintained. @@ -912,18 +977,20 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { for (MachineInstr &MI : MBB) { if (MI.isDebugInstr()) continue; - for (MachineOperand &MO : reverse(MI.explicit_operands())) { + for (MachineOperand &MO : reverse(MI.explicit_uses())) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - - if (MFI.isVRegStackified(Reg)) { - if (MO.isDef()) - Stack.push_back(Reg); - else - assert(Stack.pop_back_val() == Reg && - "Register stack pop should be paired with a push"); - } + if (MFI.isVRegStackified(Reg)) + assert(Stack.pop_back_val() == Reg && + "Register stack pop should be paired with a push"); + } + for (MachineOperand &MO : MI.defs()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (MFI.isVRegStackified(Reg)) + Stack.push_back(MO.getReg()); } } // TODO: Generalize this code to support keeping values on the stack across diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 789a025794ea0..130589c9df8c5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -88,16 +88,17 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( // If this is an address being added to a constant, fold the frame offset // into the constant. - if (MI.getOpcode() == WebAssembly::ADD_I32) { + if (MI.getOpcode() == WebAssemblyFrameLowering::getOpcAdd(MF)) { MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); if (OtherMO.isReg()) { Register OtherMOReg = OtherMO.getReg(); if (Register::isVirtualRegister(OtherMOReg)) { MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg); // TODO: For now we just opportunistically do this in the case where - // the CONST_I32 happens to have exactly one def and one use. We + // the CONST_I32/64 happens to have exactly one def and one use. We // should generalize this to optimize in more cases. - if (Def && Def->getOpcode() == WebAssembly::CONST_I32 && + if (Def && Def->getOpcode() == + WebAssemblyFrameLowering::getOpcConst(MF) && MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) { MachineOperand &ImmMO = Def->getOperand(1); ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); @@ -109,20 +110,22 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( } } - // Otherwise create an i32.add SP, offset and make it the operand. + // Otherwise create an i32/64.add SP, offset and make it the operand. const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); unsigned FIRegOperand = FrameRegister; if (FrameOffset) { - // Create i32.add SP, offset and make it the operand. + // Create i32/64.add SP, offset and make it the operand. const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); Register OffsetOp = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + BuildMI(MBB, *II, II->getDebugLoc(), + TII->get(WebAssemblyFrameLowering::getOpcConst(MF)), OffsetOp) .addImm(FrameOffset); FIRegOperand = MRI.createVirtualRegister(PtrRC); - BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), + BuildMI(MBB, *II, II->getDebugLoc(), + TII->get(WebAssemblyFrameLowering::getOpcAdd(MF)), FIRegOperand) .addReg(FrameRegister) .addReg(OffsetOp); @@ -132,6 +135,10 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( Register WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + // If the PReg has been replaced by a VReg, return that. + const auto &MFI = MF.getInfo<WebAssemblyFunctionInfo>(); + if (MFI->isFrameBaseVirtual()) + return MFI->getFrameBaseVreg(); static const unsigned Regs[2][2] = { /* !isArch64Bit isArch64Bit */ /* !hasFP */ {WebAssembly::SP32, WebAssembly::SP64}, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 5eafd6c54e782..9f5d6b2a9a47b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -67,7 +67,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { }); MachineRegisterInfo &MRI = MF.getRegInfo(); - const auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); + auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); bool Changed = false; assert(!mustPreserveAnalysisID(LiveIntervalsID) && @@ -88,8 +88,18 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) { MachineOperand &MO = *I++; if (!MO.isImplicit()) { - if (VReg == WebAssembly::NoRegister) + if (VReg == WebAssembly::NoRegister) { VReg = MRI.createVirtualRegister(RC); + if (PReg == TRI.getFrameRegister(MF)) { + auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); + assert(!FI->isFrameBaseVirtual()); + FI->setFrameBaseVreg(VReg); + LLVM_DEBUG({ + dbgs() << "replacing preg " << PReg << " with " << VReg << " (" + << Register::virtReg2Index(VReg) << ")\n"; + }); + } + } MO.setReg(VReg); if (MO.getParent()->isDebugValue()) MO.setIsDebug(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index c6cf7b6bc551a..6456026f4ba74 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -82,6 +82,7 @@ enum RuntimeLibcallSignature { func_iPTR_i32, func_iPTR_i64, func_iPTR_i64_i64, + func_iPTR_i64_i64_i32, func_iPTR_i64_i64_i64_i64, func_iPTR_i64_i64_i64_i64_i64_i64, i32_func_i64_i64, @@ -173,10 +174,13 @@ struct RuntimeLibcallSignatureTable { Table[RTLIB::FMA_F128] = func_iPTR_i64_i64_i64_i64_i64_i64; Table[RTLIB::POWI_F32] = f32_func_f32_i32; Table[RTLIB::POWI_F64] = f64_func_f64_i32; - Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i32; Table[RTLIB::SQRT_F32] = f32_func_f32; Table[RTLIB::SQRT_F64] = f64_func_f64; Table[RTLIB::SQRT_F128] = func_iPTR_i64_i64; + Table[RTLIB::CBRT_F32] = f32_func_f32; + Table[RTLIB::CBRT_F64] = f64_func_f64; + Table[RTLIB::CBRT_F128] = func_iPTR_i64_i64; Table[RTLIB::LOG_F32] = f32_func_f32; Table[RTLIB::LOG_F64] = f64_func_f64; Table[RTLIB::LOG_F128] = func_iPTR_i64_i64; @@ -829,6 +833,12 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); break; + case func_iPTR_i64_i64_i32: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I32); + break; case func_iPTR_i64_i64_i64_i64: Params.push_back(PtrTy); Params.push_back(wasm::ValType::I64); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp index 890e4b8e4e2a4..16e05150c64ef 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -20,40 +20,40 @@ WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, + SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - if (!DAG.getMachineFunction() - .getSubtarget<WebAssemblySubtarget>() - .hasBulkMemory()) + auto &ST = DAG.getMachineFunction().getSubtarget<WebAssemblySubtarget>(); + if (!ST.hasBulkMemory()) return SDValue(); SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); + auto LenMVT = ST.hasAddr64() ? MVT::i64 : MVT::i32; return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other, {Chain, MemIdx, MemIdx, Dst, Src, - DAG.getZExtOrTrunc(Size, DL, MVT::i32)}); + DAG.getZExtOrTrunc(Size, DL, LenMVT)}); } SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool IsVolatile, + SDValue Op3, Align Alignment, bool IsVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, Align, - IsVolatile, false, DstPtrInfo, - SrcPtrInfo); + return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, + Alignment, IsVolatile, false, + DstPtrInfo, SrcPtrInfo); } SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val, - SDValue Size, unsigned Align, bool IsVolatile, + SDValue Size, Align Alignment, bool IsVolatile, MachinePointerInfo DstPtrInfo) const { - if (!DAG.getMachineFunction() - .getSubtarget<WebAssemblySubtarget>() - .hasBulkMemory()) + auto &ST = DAG.getMachineFunction().getSubtarget<WebAssemblySubtarget>(); + if (!ST.hasBulkMemory()) return SDValue(); SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); + auto LenMVT = ST.hasAddr64() ? MVT::i64 : MVT::i32; // Only low byte matters for val argument, so anyext the i8 return DAG.getNode(WebAssemblyISD::MEMORY_FILL, DL, MVT::Other, Chain, MemIdx, Dst, DAG.getAnyExtOrTrunc(Val, DL, MVT::i32), - DAG.getZExtOrTrunc(Size, DL, MVT::i32)); + DAG.getZExtOrTrunc(Size, DL, LenMVT)); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h index 0b90ece27dff3..f4d2132fd3af2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -24,18 +24,19 @@ public: ~WebAssemblySelectionDAGInfo() override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool isVolatile, + SDValue Op3, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; - SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, - SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const override; + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Op1, SDValue Op2, SDValue Op3, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool IsVolatile, + SDValue Op3, Align Alignment, bool IsVolatile, MachinePointerInfo DstPtrInfo) const override; }; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index a249ccf176386..89ae45722e429 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -65,7 +65,7 @@ static void rewriteP2Align(MachineInstr &MI, unsigned OperandNo) { assert(MI.getDesc().OpInfo[OperandNo].OperandType == WebAssembly::OPERAND_P2ALIGN && "Load and store instructions should have a p2align operand"); - uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment()); + uint64_t P2Align = Log2((*MI.memoperands_begin())->getAlign()); // WebAssembly does not currently support supernatural alignment. P2Align = std::min(P2Align, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 196a74565285e..cacf5ab078a01 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -25,13 +25,15 @@ using namespace llvm; #include "WebAssemblyGenSubtargetInfo.inc" WebAssemblySubtarget & -WebAssemblySubtarget::initializeSubtargetDependencies(StringRef FS) { +WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { // Determine default and user-specified characteristics + LLVM_DEBUG(llvm::dbgs() << "initializeSubtargetDependencies\n"); - if (CPUString.empty()) - CPUString = "generic"; + if (CPU.empty()) + CPU = "generic"; - ParseSubtargetFeatures(CPUString, FS); + ParseSubtargetFeatures(CPU, FS); return *this; } @@ -39,10 +41,9 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) - : WebAssemblyGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), - TargetTriple(TT), FrameLowering(), - InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), - TLInfo(TM, *this) {} + : WebAssemblyGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), + FrameLowering(), InstrInfo(initializeSubtargetDependencies(CPU, FS)), + TSInfo(), TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableAtomicExpand() const { // If atomics are disabled, atomic ops are lowered instead of expanded diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 8db2120f9834e..8b95a3ddb8373 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -47,9 +47,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasMultivalue = false; bool HasMutableGlobals = false; bool HasTailCall = false; - - /// String name of used CPU. - std::string CPUString; + bool HasReferenceTypes = false; /// What processor and OS we're targeting. Triple TargetTriple; @@ -59,9 +57,8 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { WebAssemblySelectionDAGInfo TSInfo; WebAssemblyTargetLowering TLInfo; - /// Initializes using CPUString and the passed in feature string so that we - /// can use initializer lists for subtarget initialization. - WebAssemblySubtarget &initializeSubtargetDependencies(StringRef FS); + WebAssemblySubtarget &initializeSubtargetDependencies(StringRef CPU, + StringRef FS); public: /// This constructor initializes the data members to match that @@ -104,6 +101,7 @@ public: bool hasMultivalue() const { return HasMultivalue; } bool hasMutableGlobals() const { return HasMutableGlobals; } bool hasTailCall() const { return HasTailCall; } + bool hasReferenceTypes() const { return HasReferenceTypes; } /// Parses features string setting specified subtarget options. Definition of /// function is auto generated by tblgen. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 4291b48c16bee..7bf655c925a45 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -45,6 +45,16 @@ static cl::opt<bool> EnableEmSjLj( cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"), cl::init(false)); +// A command-line option to keep implicit locals +// for the purpose of testing with lit/llc ONLY. +// This produces output which is not valid WebAssembly, and is not supported +// by assemblers/disassemblers and other MC based tools. +static cl::opt<bool> WasmDisableExplicitLocals( + "wasm-disable-explicit-locals", cl::Hidden, + cl::desc("WebAssembly: output implicit locals in" + " instruction output for test purposes only."), + cl::init(false)); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() { // Register the target. RegisterTargetMachine<WebAssemblyTargetMachine> X( @@ -75,8 +85,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() { initializeWebAssemblyExplicitLocalsPass(PR); initializeWebAssemblyLowerBrUnlessPass(PR); initializeWebAssemblyRegNumberingPass(PR); + initializeWebAssemblyDebugFixupPass(PR); initializeWebAssemblyPeepholePass(PR); - initializeWebAssemblyCallIndirectFixupPass(PR); } //===----------------------------------------------------------------------===// @@ -210,8 +220,8 @@ private: FeatureBitset coalesceFeatures(const Module &M) { FeatureBitset Features = WasmTM - ->getSubtargetImpl(WasmTM->getTargetCPU(), - WasmTM->getTargetFeatureString()) + ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()), + std::string(WasmTM->getTargetFeatureString())) ->getFeatureBits(); for (auto &F : M) Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits(); @@ -274,21 +284,22 @@ private: void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) { for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { - std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); - if (KV.Value == WebAssembly::FeatureAtomics && Stripped) { - // "atomics" is special: code compiled without atomics may have had its - // atomics lowered to nonatomic operations. In that case, atomics is - // disallowed to prevent unsafe linking with atomics-enabled objects. - assert(!Features[WebAssembly::FeatureAtomics] || - !Features[WebAssembly::FeatureBulkMemory]); - M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, - wasm::WASM_FEATURE_PREFIX_DISALLOWED); - } else if (Features[KV.Value]) { - // Otherwise features are marked Used or not mentioned + if (Features[KV.Value]) { + // Mark features as used + std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, wasm::WASM_FEATURE_PREFIX_USED); } } + // Code compiled without atomics or bulk-memory may have had its atomics or + // thread-local data lowered to nonatomic operations or non-thread-local + // data. In that case, we mark the pseudo-feature "shared-mem" as disallowed + // to tell the linker that it would be unsafe to allow this code ot be used + // in a module with shared memory. + if (Stripped) { + M.addModuleFlag(Module::ModFlagBehavior::Error, "wasm-feature-shared-mem", + wasm::WASM_FEATURE_PREFIX_DISALLOWED); + } } }; char CoalesceFeaturesAndStripAtomics::ID = 0; @@ -395,6 +406,10 @@ bool WebAssemblyPassConfig::addInstSelector() { // it's inconvenient to collect. Collect it now, and update the immediate // operands. addPass(createWebAssemblySetP2AlignOperands()); + + // Eliminate range checks and add default targets to br_table instructions. + addPass(createWebAssemblyFixBrTableDefaults()); + return false; } @@ -423,11 +438,6 @@ void WebAssemblyPassConfig::addPostRegAlloc() { void WebAssemblyPassConfig::addPreEmitPass() { TargetPassConfig::addPreEmitPass(); - // Rewrite pseudo call_indirect instructions as real instructions. - // This needs to run before register stackification, because we change the - // order of the arguments. - addPass(createWebAssemblyCallIndirectFixup()); - // Eliminate multiple-entry loops. addPass(createWebAssemblyFixIrreducibleControlFlow()); @@ -472,7 +482,8 @@ void WebAssemblyPassConfig::addPreEmitPass() { addPass(createWebAssemblyCFGStackify()); // Insert explicit local.get and local.set operators. - addPass(createWebAssemblyExplicitLocals()); + if (!WasmDisableExplicitLocals) + addPass(createWebAssemblyExplicitLocals()); // Lower br_unless into br_if. addPass(createWebAssemblyLowerBrUnless()); @@ -483,6 +494,10 @@ void WebAssemblyPassConfig::addPreEmitPass() { // Create a mapping from LLVM CodeGen virtual registers to wasm registers. addPass(createWebAssemblyRegNumbering()); + + // Fix debug_values whose defs have been stackified. + if (!WasmDisableExplicitLocals) + addPass(createWebAssemblyDebugFixup()); } yaml::MachineFunctionInfo * diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h index 850e6b9a9e9e0..dd5b39773313e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -47,7 +47,7 @@ public: TargetTransformInfo getTargetTransformInfo(const Function &F) override; - bool usesPhysRegsForPEI() const override { return false; } + bool usesPhysRegsForValues() const override { return false; } yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index ac8ad927d334d..28703a2787e0d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -44,13 +44,14 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const { } unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, const Instruction *CxtI) { unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); if (auto *VTy = dyn_cast<VectorType>(Ty)) { switch (Opcode) { @@ -62,10 +63,11 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( // approxmation. if (Opd2Info != TTI::OK_UniformValue && Opd2Info != TTI::OK_UniformConstantValue) - Cost = VTy->getNumElements() * - (TargetTransformInfo::TCC_Basic + - getArithmeticInstrCost(Opcode, VTy->getElementType()) + - TargetTransformInfo::TCC_Basic); + Cost = + cast<FixedVectorType>(VTy)->getNumElements() * + (TargetTransformInfo::TCC_Basic + + getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) + + TargetTransformInfo::TCC_Basic); break; } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 2731dda10becc..79588a9f56698 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -57,6 +57,7 @@ public: unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index a237da8154ab7..bc2bb4fd69352 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -49,7 +49,7 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) { if (!MI.isCall()) return false; - const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode())); + const MachineOperand &MO = getCalleeOp(MI); assert(MO.isGlobal() || MO.isSymbol()); if (MO.isSymbol()) { @@ -79,3 +79,20 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) { // original LLVm IR? (Even when the callee may throw) return true; } + +const MachineOperand &WebAssembly::getCalleeOp(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case WebAssembly::CALL: + case WebAssembly::CALL_S: + case WebAssembly::RET_CALL: + case WebAssembly::RET_CALL_S: + return MI.getOperand(MI.getNumExplicitDefs()); + case WebAssembly::CALL_INDIRECT: + case WebAssembly::CALL_INDIRECT_S: + case WebAssembly::RET_CALL_INDIRECT: + case WebAssembly::RET_CALL_INDIRECT_S: + return MI.getOperand(MI.getNumOperands() - 1); + default: + llvm_unreachable("Not a call instruction"); + } +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h index 26cf84de89b92..4f0ed43a24816 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -44,6 +44,10 @@ template <typename T> MachineBasicBlock *getBottom(const T *Unit) { return Bottom; } +/// Returns the operand number of a callee, assuming the argument is a call +/// instruction. +const MachineOperand &getCalleeOp(const MachineInstr &MI); + } // end namespace WebAssembly } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt index 701b347bcbd76..c9f7574b9a41b 100644 --- a/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -86,7 +86,6 @@ lifetime2.C # violates C++ DR1696 # WASI doesn't have stdjmp.h yet pr56982.c -simd-2.C # WASI doesn't have pthread.h yet thread_local3.C |