diff options
Diffstat (limited to 'llvm/lib/Target/WebAssembly')
90 files changed, 22496 insertions, 0 deletions
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp new file mode 100644 index 000000000000..53a96fd6a97d --- /dev/null +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -0,0 +1,873 @@ +//==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file is part of the WebAssembly Assembler. +/// +/// It contains code to translate a parsed .s file into MCInsts. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "MCTargetDesc/WebAssemblyTargetStreamer.h" +#include "TargetInfo/WebAssemblyTargetInfo.h" +#include "WebAssembly.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasm-asm-parser" + +namespace { + +/// WebAssemblyOperand - Instances of this class represent the operands in a +/// parsed WASM machine instruction. +struct WebAssemblyOperand : public MCParsedAsmOperand { + enum KindTy { Token, Integer, Float, Symbol, BrList } Kind; + + SMLoc StartLoc, EndLoc; + + struct TokOp { + StringRef Tok; + }; + + struct IntOp { + int64_t Val; + }; + + struct FltOp { + double Val; + }; + + struct SymOp { + const MCExpr *Exp; + }; + + struct BrLOp { + std::vector<unsigned> List; + }; + + union { + struct TokOp Tok; + struct IntOp Int; + struct FltOp Flt; + struct SymOp Sym; + struct BrLOp BrL; + }; + + WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T) + : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {} + WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I) + : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {} + WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F) + : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {} + WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S) + : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {} + WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End) + : Kind(K), StartLoc(Start), EndLoc(End), BrL() {} + + ~WebAssemblyOperand() { + if (isBrList()) + BrL.~BrLOp(); + } + + bool isToken() const override { return Kind == Token; } + bool isImm() const override { return Kind == Integer || Kind == Symbol; } + bool isFPImm() const { return Kind == Float; } + bool isMem() const override { return false; } + bool isReg() const override { return false; } + bool isBrList() const { return Kind == BrList; } + + unsigned getReg() const override { + llvm_unreachable("Assembly inspects a register operand"); + return 0; + } + + StringRef getToken() const { + assert(isToken()); + return Tok.Tok; + } + + SMLoc getStartLoc() const override { return StartLoc; } + SMLoc getEndLoc() const override { return EndLoc; } + + void addRegOperands(MCInst &, unsigned) const { + // Required by the assembly matcher. + llvm_unreachable("Assembly matcher creates register operands"); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (Kind == Integer) + Inst.addOperand(MCOperand::createImm(Int.Val)); + else if (Kind == Symbol) + Inst.addOperand(MCOperand::createExpr(Sym.Exp)); + else + llvm_unreachable("Should be integer immediate or symbol!"); + } + + void addFPImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (Kind == Float) + Inst.addOperand(MCOperand::createFPImm(Flt.Val)); + else + llvm_unreachable("Should be float immediate!"); + } + + void addBrListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && isBrList() && "Invalid BrList!"); + for (auto Br : BrL.List) + Inst.addOperand(MCOperand::createImm(Br)); + } + + void print(raw_ostream &OS) const override { + switch (Kind) { + case Token: + OS << "Tok:" << Tok.Tok; + break; + case Integer: + OS << "Int:" << Int.Val; + break; + case Float: + OS << "Flt:" << Flt.Val; + break; + case Symbol: + OS << "Sym:" << Sym.Exp; + break; + case BrList: + OS << "BrList:" << BrL.List.size(); + break; + } + } +}; + +class WebAssemblyAsmParser final : public MCTargetAsmParser { + MCAsmParser &Parser; + MCAsmLexer &Lexer; + + // Much like WebAssemblyAsmPrinter in the backend, we have to own these. + std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures; + + // Order of labels, directives and instructions in a .s file have no + // syntactical enforcement. This class is a callback from the actual parser, + // and yet we have to be feeding data to the streamer in a very particular + // order to ensure a correct binary encoding that matches the regular backend + // (the streamer does not enforce this). This "state machine" enum helps + // guarantee that correct order. + enum ParserState { + FileStart, + Label, + FunctionStart, + FunctionLocals, + Instructions, + EndFunction, + DataSection, + } CurrentState = FileStart; + + // For ensuring blocks are properly nested. + enum NestingType { + Function, + Block, + Loop, + Try, + If, + Else, + Undefined, + }; + std::vector<NestingType> NestingStack; + + // We track this to see if a .functype following a label is the same, + // as this is how we recognize the start of a function. + MCSymbol *LastLabel = nullptr; + MCSymbol *LastFunctionLabel = nullptr; + +public: + WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, STI, MII), Parser(Parser), + Lexer(Parser.getLexer()) { + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + +#define GET_ASSEMBLER_HEADER +#include "WebAssemblyGenAsmMatcher.inc" + + // TODO: This is required to be implemented, but appears unused. + bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/, + SMLoc & /*EndLoc*/) override { + llvm_unreachable("ParseRegister is not implemented."); + } + + bool error(const Twine &Msg, const AsmToken &Tok) { + return Parser.Error(Tok.getLoc(), Msg + Tok.getString()); + } + + bool error(const Twine &Msg) { + return Parser.Error(Lexer.getTok().getLoc(), Msg); + } + + void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) { + Signatures.push_back(std::move(Sig)); + } + + std::pair<StringRef, StringRef> nestingString(NestingType NT) { + switch (NT) { + case Function: + return {"function", "end_function"}; + case Block: + return {"block", "end_block"}; + case Loop: + return {"loop", "end_loop"}; + case Try: + return {"try", "end_try"}; + case If: + return {"if", "end_if"}; + case Else: + return {"else", "end_if"}; + default: + llvm_unreachable("unknown NestingType"); + } + } + + void push(NestingType NT) { NestingStack.push_back(NT); } + + bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) { + if (NestingStack.empty()) + return error(Twine("End of block construct with no start: ") + Ins); + auto Top = NestingStack.back(); + if (Top != NT1 && Top != NT2) + return error(Twine("Block construct type mismatch, expected: ") + + nestingString(Top).second + ", instead got: " + Ins); + NestingStack.pop_back(); + return false; + } + + bool ensureEmptyNestingStack() { + auto Err = !NestingStack.empty(); + while (!NestingStack.empty()) { + error(Twine("Unmatched block construct(s) at function end: ") + + nestingString(NestingStack.back()).first); + NestingStack.pop_back(); + } + return Err; + } + + bool isNext(AsmToken::TokenKind Kind) { + auto Ok = Lexer.is(Kind); + if (Ok) + Parser.Lex(); + return Ok; + } + + bool expect(AsmToken::TokenKind Kind, const char *KindName) { + if (!isNext(Kind)) + return error(std::string("Expected ") + KindName + ", instead got: ", + Lexer.getTok()); + return false; + } + + StringRef expectIdent() { + if (!Lexer.is(AsmToken::Identifier)) { + error("Expected identifier, got: ", Lexer.getTok()); + return StringRef(); + } + auto Name = Lexer.getTok().getString(); + Parser.Lex(); + return Name; + } + + Optional<wasm::ValType> parseType(const StringRef &Type) { + // FIXME: can't use StringSwitch because wasm::ValType doesn't have a + // "invalid" value. + if (Type == "i32") + return wasm::ValType::I32; + if (Type == "i64") + return wasm::ValType::I64; + if (Type == "f32") + return wasm::ValType::F32; + if (Type == "f64") + return wasm::ValType::F64; + if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || + Type == "i32x4" || Type == "i64x2" || Type == "f32x4" || + Type == "f64x2") + return wasm::ValType::V128; + if (Type == "exnref") + return wasm::ValType::EXNREF; + return Optional<wasm::ValType>(); + } + + WebAssembly::BlockType parseBlockType(StringRef ID) { + // Multivalue block types are handled separately in parseSignature + return StringSwitch<WebAssembly::BlockType>(ID) + .Case("i32", WebAssembly::BlockType::I32) + .Case("i64", WebAssembly::BlockType::I64) + .Case("f32", WebAssembly::BlockType::F32) + .Case("f64", WebAssembly::BlockType::F64) + .Case("v128", WebAssembly::BlockType::V128) + .Case("exnref", WebAssembly::BlockType::Exnref) + .Case("void", WebAssembly::BlockType::Void) + .Default(WebAssembly::BlockType::Invalid); + } + + bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) { + while (Lexer.is(AsmToken::Identifier)) { + auto Type = parseType(Lexer.getTok().getString()); + if (!Type) + return error("unknown type: ", Lexer.getTok()); + Types.push_back(Type.getValue()); + Parser.Lex(); + if (!isNext(AsmToken::Comma)) + break; + } + return false; + } + + void parseSingleInteger(bool IsNegative, OperandVector &Operands) { + auto &Int = Lexer.getTok(); + int64_t Val = Int.getIntVal(); + if (IsNegative) + Val = -Val; + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(), + WebAssemblyOperand::IntOp{Val})); + Parser.Lex(); + } + + bool parseSingleFloat(bool IsNegative, OperandVector &Operands) { + auto &Flt = Lexer.getTok(); + double Val; + if (Flt.getString().getAsDouble(Val, false)) + return error("Cannot parse real: ", Flt); + if (IsNegative) + Val = -Val; + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(), + WebAssemblyOperand::FltOp{Val})); + Parser.Lex(); + return false; + } + + bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) { + if (Lexer.isNot(AsmToken::Identifier)) + return true; + auto &Flt = Lexer.getTok(); + auto S = Flt.getString(); + double Val; + if (S.compare_lower("infinity") == 0) { + Val = std::numeric_limits<double>::infinity(); + } else if (S.compare_lower("nan") == 0) { + Val = std::numeric_limits<double>::quiet_NaN(); + } else { + return true; + } + if (IsNegative) + Val = -Val; + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(), + WebAssemblyOperand::FltOp{Val})); + Parser.Lex(); + return false; + } + + bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) { + // FIXME: there is probably a cleaner way to do this. + auto IsLoadStore = InstName.find(".load") != StringRef::npos || + InstName.find(".store") != StringRef::npos; + auto IsAtomic = InstName.find("atomic.") != StringRef::npos; + if (IsLoadStore || IsAtomic) { + // Parse load/store operands of the form: offset:p2align=align + if (IsLoadStore && isNext(AsmToken::Colon)) { + auto Id = expectIdent(); + if (Id != "p2align") + return error("Expected p2align, instead got: " + Id); + if (expect(AsmToken::Equal, "=")) + return true; + if (!Lexer.is(AsmToken::Integer)) + return error("Expected integer constant"); + parseSingleInteger(false, Operands); + } else { + // Alignment not specified (or atomics, must use default alignment). + // We can't just call WebAssembly::GetDefaultP2Align since we don't have + // an opcode until after the assembly matcher, so set a default to fix + // up later. + auto Tok = Lexer.getTok(); + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(), + WebAssemblyOperand::IntOp{-1})); + } + } + return false; + } + + void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc, + WebAssembly::BlockType BT) { + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Integer, NameLoc, NameLoc, + WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)})); + } + + bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override { + // Note: Name does NOT point into the sourcecode, but to a local, so + // use NameLoc instead. + Name = StringRef(NameLoc.getPointer(), Name.size()); + + // WebAssembly has instructions with / in them, which AsmLexer parses + // as seperate tokens, so if we find such tokens immediately adjacent (no + // whitespace), expand the name to include them: + for (;;) { + auto &Sep = Lexer.getTok(); + if (Sep.getLoc().getPointer() != Name.end() || + Sep.getKind() != AsmToken::Slash) + break; + // Extend name with / + Name = StringRef(Name.begin(), Name.size() + Sep.getString().size()); + Parser.Lex(); + // We must now find another identifier, or error. + auto &Id = Lexer.getTok(); + if (Id.getKind() != AsmToken::Identifier || + Id.getLoc().getPointer() != Name.end()) + return error("Incomplete instruction name: ", Id); + Name = StringRef(Name.begin(), Name.size() + Id.getString().size()); + Parser.Lex(); + } + + // Now construct the name as first operand. + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()), + WebAssemblyOperand::TokOp{Name})); + + // If this instruction is part of a control flow structure, ensure + // proper nesting. + bool ExpectBlockType = false; + bool ExpectFuncType = false; + if (Name == "block") { + push(Block); + ExpectBlockType = true; + } else if (Name == "loop") { + push(Loop); + ExpectBlockType = true; + } else if (Name == "try") { + push(Try); + ExpectBlockType = true; + } else if (Name == "if") { + push(If); + ExpectBlockType = true; + } else if (Name == "else") { + if (pop(Name, If)) + return true; + push(Else); + } else if (Name == "catch") { + if (pop(Name, Try)) + return true; + push(Try); + } else if (Name == "end_if") { + if (pop(Name, If, Else)) + return true; + } else if (Name == "end_try") { + if (pop(Name, Try)) + return true; + } else if (Name == "end_loop") { + if (pop(Name, Loop)) + return true; + } else if (Name == "end_block") { + if (pop(Name, Block)) + return true; + } else if (Name == "end_function") { + ensureLocals(getStreamer()); + CurrentState = EndFunction; + if (pop(Name, Function) || ensureEmptyNestingStack()) + return true; + } else if (Name == "call_indirect" || Name == "return_call_indirect") { + ExpectFuncType = true; + } + + if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) { + // This has a special TYPEINDEX operand which in text we + // represent as a signature, such that we can re-build this signature, + // attach it to an anonymous symbol, which is what WasmObjectWriter + // expects to be able to recreate the actual unique-ified type indices. + auto Loc = Parser.getTok(); + auto Signature = std::make_unique<wasm::WasmSignature>(); + if (parseSignature(Signature.get())) + return true; + // Got signature as block type, don't need more + ExpectBlockType = false; + auto &Ctx = getStreamer().getContext(); + // The "true" here will cause this to be a nameless symbol. + MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true); + auto *WasmSym = cast<MCSymbolWasm>(Sym); + WasmSym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = MCSymbolRefExpr::create( + WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(), + WebAssemblyOperand::SymOp{Expr})); + } + + while (Lexer.isNot(AsmToken::EndOfStatement)) { + auto &Tok = Lexer.getTok(); + switch (Tok.getKind()) { + case AsmToken::Identifier: { + if (!parseSpecialFloatMaybe(false, Operands)) + break; + auto &Id = Lexer.getTok(); + if (ExpectBlockType) { + // Assume this identifier is a block_type. + auto BT = parseBlockType(Id.getString()); + if (BT == WebAssembly::BlockType::Invalid) + return error("Unknown block type: ", Id); + addBlockTypeOperand(Operands, NameLoc, BT); + Parser.Lex(); + } else { + // Assume this identifier is a label. + const MCExpr *Val; + SMLoc End; + if (Parser.parseExpression(Val, End)) + return error("Cannot parse symbol: ", Lexer.getTok()); + Operands.push_back(std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(), + WebAssemblyOperand::SymOp{Val})); + if (checkForP2AlignIfLoadStore(Operands, Name)) + return true; + } + break; + } + case AsmToken::Minus: + Parser.Lex(); + if (Lexer.is(AsmToken::Integer)) { + parseSingleInteger(true, Operands); + if (checkForP2AlignIfLoadStore(Operands, Name)) + return true; + } else if(Lexer.is(AsmToken::Real)) { + if (parseSingleFloat(true, Operands)) + return true; + } else if (!parseSpecialFloatMaybe(true, Operands)) { + } else { + return error("Expected numeric constant instead got: ", + Lexer.getTok()); + } + break; + case AsmToken::Integer: + parseSingleInteger(false, Operands); + if (checkForP2AlignIfLoadStore(Operands, Name)) + return true; + break; + case AsmToken::Real: { + if (parseSingleFloat(false, Operands)) + return true; + break; + } + case AsmToken::LCurly: { + Parser.Lex(); + auto Op = std::make_unique<WebAssemblyOperand>( + WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc()); + if (!Lexer.is(AsmToken::RCurly)) + for (;;) { + Op->BrL.List.push_back(Lexer.getTok().getIntVal()); + expect(AsmToken::Integer, "integer"); + if (!isNext(AsmToken::Comma)) + break; + } + expect(AsmToken::RCurly, "}"); + Operands.push_back(std::move(Op)); + break; + } + default: + return error("Unexpected token in operand: ", Tok); + } + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (expect(AsmToken::Comma, ",")) + return true; + } + } + if (ExpectBlockType && Operands.size() == 1) { + // Support blocks with no operands as default to void. + addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void); + } + Parser.Lex(); + return false; + } + + void onLabelParsed(MCSymbol *Symbol) override { + LastLabel = Symbol; + CurrentState = Label; + } + + bool parseSignature(wasm::WasmSignature *Signature) { + if (expect(AsmToken::LParen, "(")) + return true; + if (parseRegTypeList(Signature->Params)) + return true; + if (expect(AsmToken::RParen, ")")) + return true; + if (expect(AsmToken::MinusGreater, "->")) + return true; + if (expect(AsmToken::LParen, "(")) + return true; + if (parseRegTypeList(Signature->Returns)) + return true; + if (expect(AsmToken::RParen, ")")) + return true; + return false; + } + + bool CheckDataSection() { + if (CurrentState != DataSection) { + auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first); + if (WS && WS->getKind().isText()) + return error("data directive must occur in a data segment: ", + Lexer.getTok()); + } + CurrentState = DataSection; + return false; + } + + // This function processes wasm-specific directives streamed to + // WebAssemblyTargetStreamer, all others go to the generic parser + // (see WasmAsmParser). + bool ParseDirective(AsmToken DirectiveID) override { + // This function has a really weird return value behavior that is different + // from all the other parsing functions: + // - return true && no tokens consumed -> don't know this directive / let + // the generic parser handle it. + // - return true && tokens consumed -> a parsing error occurred. + // - return false -> processed this directive successfully. + assert(DirectiveID.getKind() == AsmToken::Identifier); + auto &Out = getStreamer(); + auto &TOut = + reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer()); + auto &Ctx = Out.getContext(); + + // TODO: any time we return an error, at least one token must have been + // consumed, otherwise this will not signal an error to the caller. + if (DirectiveID.getString() == ".globaltype") { + auto SymName = expectIdent(); + if (SymName.empty()) + return true; + if (expect(AsmToken::Comma, ",")) + return true; + auto TypeTok = Lexer.getTok(); + auto TypeName = expectIdent(); + if (TypeName.empty()) + return true; + auto Type = parseType(TypeName); + if (!Type) + return error("Unknown type in .globaltype directive: ", TypeTok); + // Now set this symbol with the correct type. + auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); + WasmSym->setGlobalType( + wasm::WasmGlobalType{uint8_t(Type.getValue()), true}); + // And emit the directive again. + TOut.emitGlobalType(WasmSym); + return expect(AsmToken::EndOfStatement, "EOL"); + } + + if (DirectiveID.getString() == ".functype") { + // This code has to send things to the streamer similar to + // WebAssemblyAsmPrinter::EmitFunctionBodyStart. + // TODO: would be good to factor this into a common function, but the + // assembler and backend really don't share any common code, and this code + // parses the locals seperately. + auto SymName = expectIdent(); + if (SymName.empty()) + return true; + auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); + if (CurrentState == Label && WasmSym == LastLabel) { + // This .functype indicates a start of a function. + if (ensureEmptyNestingStack()) + return true; + CurrentState = FunctionStart; + LastFunctionLabel = LastLabel; + push(Function); + } + auto Signature = std::make_unique<wasm::WasmSignature>(); + if (parseSignature(Signature.get())) + return true; + WasmSym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + TOut.emitFunctionType(WasmSym); + // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. + return expect(AsmToken::EndOfStatement, "EOL"); + } + + if (DirectiveID.getString() == ".eventtype") { + auto SymName = expectIdent(); + if (SymName.empty()) + return true; + auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName)); + auto Signature = std::make_unique<wasm::WasmSignature>(); + if (parseRegTypeList(Signature->Params)) + return true; + WasmSym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT); + TOut.emitEventType(WasmSym); + // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. + return expect(AsmToken::EndOfStatement, "EOL"); + } + + if (DirectiveID.getString() == ".local") { + if (CurrentState != FunctionStart) + return error(".local directive should follow the start of a function", + Lexer.getTok()); + SmallVector<wasm::ValType, 4> Locals; + if (parseRegTypeList(Locals)) + return true; + TOut.emitLocal(Locals); + CurrentState = FunctionLocals; + return expect(AsmToken::EndOfStatement, "EOL"); + } + + if (DirectiveID.getString() == ".int8" || + DirectiveID.getString() == ".int16" || + DirectiveID.getString() == ".int32" || + DirectiveID.getString() == ".int64") { + if (CheckDataSection()) return true; + const MCExpr *Val; + SMLoc End; + if (Parser.parseExpression(Val, End)) + return error("Cannot parse .int expression: ", Lexer.getTok()); + size_t NumBits = 0; + DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits); + Out.EmitValue(Val, NumBits / 8, End); + return expect(AsmToken::EndOfStatement, "EOL"); + } + + if (DirectiveID.getString() == ".asciz") { + if (CheckDataSection()) return true; + std::string S; + if (Parser.parseEscapedString(S)) + return error("Cannot parse string constant: ", Lexer.getTok()); + Out.EmitBytes(StringRef(S.c_str(), S.length() + 1)); + return expect(AsmToken::EndOfStatement, "EOL"); + } + + return true; // We didn't process this directive. + } + + // Called either when the first instruction is parsed of the function ends. + void ensureLocals(MCStreamer &Out) { + if (CurrentState == FunctionStart) { + // We haven't seen a .local directive yet. The streamer requires locals to + // be encoded as a prelude to the instructions, so emit an empty list of + // locals here. + auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>( + *Out.getTargetStreamer()); + TOut.emitLocal(SmallVector<wasm::ValType, 0>()); + CurrentState = FunctionLocals; + } + } + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override { + MCInst Inst; + Inst.setLoc(IDLoc); + unsigned MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + switch (MatchResult) { + case Match_Success: { + ensureLocals(Out); + // Fix unknown p2align operands. + auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode()); + if (Align != -1U) { + auto &Op0 = Inst.getOperand(0); + if (Op0.getImm() == -1) + Op0.setImm(Align); + } + Out.EmitInstruction(Inst, getSTI()); + if (CurrentState == EndFunction) { + onEndOfFunction(); + } else { + CurrentState = Instructions; + } + return false; + } + case Match_MissingFeature: + return Parser.Error( + IDLoc, "instruction requires a WASM feature not currently enabled"); + case Match_MnemonicFail: + return Parser.Error(IDLoc, "invalid instruction"); + case Match_NearMisses: + return Parser.Error(IDLoc, "ambiguous instruction"); + case Match_InvalidTiedOperand: + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Parser.Error(IDLoc, "too few operands for instruction"); + ErrorLoc = Operands[ErrorInfo]->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Parser.Error(ErrorLoc, "invalid operand for instruction"); + } + } + llvm_unreachable("Implement any new match types added!"); + } + + void doBeforeLabelEmit(MCSymbol *Symbol) override { + // Start a new section for the next function automatically, since our + // object writer expects each function to have its own section. This way + // The user can't forget this "convention". + auto SymName = Symbol->getName(); + if (SymName.startswith(".L")) + return; // Local Symbol. + // Only create a new text section if we're already in one. + auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first); + if (!CWS || !CWS->getKind().isText()) + return; + auto SecName = ".text." + SymName; + auto WS = getContext().getWasmSection(SecName, SectionKind::getText()); + getStreamer().SwitchSection(WS); + } + + void onEndOfFunction() { + // Automatically output a .size directive, so it becomes optional for the + // user. + if (!LastFunctionLabel) return; + auto TempSym = getContext().createLinkerPrivateTempSymbol(); + getStreamer().EmitLabel(TempSym); + auto Start = MCSymbolRefExpr::create(LastFunctionLabel, getContext()); + auto End = MCSymbolRefExpr::create(TempSym, getContext()); + auto Expr = + MCBinaryExpr::create(MCBinaryExpr::Sub, End, Start, getContext()); + getStreamer().emitELFSize(LastFunctionLabel, Expr); + } + + void onEndOfFile() override { ensureEmptyNestingStack(); } +}; +} // end anonymous namespace + +// Force static initialization. +extern "C" void LLVMInitializeWebAssemblyAsmParser() { + RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32()); + RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64()); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "WebAssemblyGenAsmMatcher.inc" diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp new file mode 100644 index 000000000000..9a9c31cff2d5 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -0,0 +1,298 @@ +//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file is part of the WebAssembly Disassembler. +/// +/// It contains code to translate the data produced by the decoder into +/// MCInsts. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "TargetInfo/WebAssemblyTargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasm-disassembler" + +using DecodeStatus = MCDisassembler::DecodeStatus; + +#include "WebAssemblyGenDisassemblerTables.inc" + +namespace { +static constexpr int WebAssemblyInstructionTableSize = 256; + +class WebAssemblyDisassembler final : public MCDisassembler { + std::unique_ptr<const MCInstrInfo> MCII; + + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; + DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; + +public: + WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + std::unique_ptr<const MCInstrInfo> MCII) + : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} +}; +} // end anonymous namespace + +static MCDisassembler *createWebAssemblyDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); + return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); +} + +extern "C" void LLVMInitializeWebAssemblyDisassembler() { + // Register the disassembler for each target. + TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), + createWebAssemblyDisassembler); + TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), + createWebAssemblyDisassembler); +} + +static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { + if (Size >= Bytes.size()) + return -1; + auto V = Bytes[Size]; + Size++; + return V; +} + +static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, + bool Signed) { + unsigned N = 0; + const char *Error = nullptr; + Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, + Bytes.data() + Bytes.size(), &Error) + : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, + Bytes.data() + Bytes.size(), + &Error)); + if (Error) + return false; + Size += N; + return true; +} + +static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, + ArrayRef<uint8_t> Bytes, bool Signed) { + int64_t Val; + if (!nextLEB(Val, Bytes, Size, Signed)) + return false; + MI.addOperand(MCOperand::createImm(Val)); + return true; +} + +template <typename T> +bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { + if (Size + sizeof(T) > Bytes.size()) + return false; + T Val = support::endian::read<T, support::endianness::little, 1>( + Bytes.data() + Size); + Size += sizeof(T); + if (std::is_floating_point<T>::value) { + MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val))); + } else { + MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); + } + return true; +} + +MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( + StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, raw_ostream &CStream) const { + Size = 0; + if (Address == 0) { + // Start of a code section: we're parsing only the function count. + int64_t FunctionCount; + if (!nextLEB(FunctionCount, Bytes, Size, false)) + return MCDisassembler::Fail; + outs() << " # " << FunctionCount << " functions in section."; + } else { + // Parse the start of a single function. + int64_t BodySize, LocalEntryCount; + if (!nextLEB(BodySize, Bytes, Size, false) || + !nextLEB(LocalEntryCount, Bytes, Size, false)) + return MCDisassembler::Fail; + if (LocalEntryCount) { + outs() << " .local "; + for (int64_t I = 0; I < LocalEntryCount; I++) { + int64_t Count, Type; + if (!nextLEB(Count, Bytes, Size, false) || + !nextLEB(Type, Bytes, Size, false)) + return MCDisassembler::Fail; + for (int64_t J = 0; J < Count; J++) { + if (I || J) + outs() << ", "; + outs() << WebAssembly::anyTypeToString(Type); + } + } + } + } + outs() << "\n"; + return MCDisassembler::Success; +} + +MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( + MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, + raw_ostream & /*OS*/, raw_ostream &CS) const { + CommentStream = &CS; + Size = 0; + int Opc = nextByte(Bytes, Size); + if (Opc < 0) + return MCDisassembler::Fail; + const auto *WasmInst = &InstructionTable0[Opc]; + // If this is a prefix byte, indirect to another table. + if (WasmInst->ET == ET_Prefix) { + WasmInst = nullptr; + // Linear search, so far only 2 entries. + for (auto PT = PrefixTable; PT->Table; PT++) { + if (PT->Prefix == Opc) { + WasmInst = PT->Table; + break; + } + } + if (!WasmInst) + return MCDisassembler::Fail; + int64_t PrefixedOpc; + if (!nextLEB(PrefixedOpc, Bytes, Size, false)) + return MCDisassembler::Fail; + if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) + return MCDisassembler::Fail; + WasmInst += PrefixedOpc; + } + if (WasmInst->ET == ET_Unused) + return MCDisassembler::Fail; + // At this point we must have a valid instruction to decode. + assert(WasmInst->ET == ET_Instruction); + MI.setOpcode(WasmInst->Opcode); + // Parse any operands. + for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { + auto OT = OperandTable[WasmInst->OperandStart + OPI]; + switch (OT) { + // ULEB operands: + case WebAssembly::OPERAND_BASIC_BLOCK: + case WebAssembly::OPERAND_LOCAL: + case WebAssembly::OPERAND_GLOBAL: + case WebAssembly::OPERAND_FUNCTION32: + case WebAssembly::OPERAND_OFFSET32: + case WebAssembly::OPERAND_P2ALIGN: + case WebAssembly::OPERAND_TYPEINDEX: + case WebAssembly::OPERAND_EVENT: + case MCOI::OPERAND_IMMEDIATE: { + if (!parseLEBImmediate(MI, Size, Bytes, false)) + return MCDisassembler::Fail; + break; + } + // SLEB operands: + case WebAssembly::OPERAND_I32IMM: + case WebAssembly::OPERAND_I64IMM: { + if (!parseLEBImmediate(MI, Size, Bytes, true)) + return MCDisassembler::Fail; + break; + } + // block_type operands: + case WebAssembly::OPERAND_SIGNATURE: { + int64_t Val; + uint64_t PrevSize = Size; + if (!nextLEB(Val, Bytes, Size, true)) + return MCDisassembler::Fail; + if (Val < 0) { + // Negative values are single septet value types or empty types + if (Size != PrevSize + 1) { + MI.addOperand( + MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); + } else { + MI.addOperand(MCOperand::createImm(Val & 0x7f)); + } + } else { + // We don't have access to the signature, so create a symbol without one + MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); + auto *WasmSym = cast<MCSymbolWasm>(Sym); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = MCSymbolRefExpr::create( + WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); + MI.addOperand(MCOperand::createExpr(Expr)); + } + break; + } + // FP operands. + case WebAssembly::OPERAND_F32IMM: { + if (!parseImmediate<float>(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + case WebAssembly::OPERAND_F64IMM: { + if (!parseImmediate<double>(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + // Vector lane operands (not LEB encoded). + case WebAssembly::OPERAND_VEC_I8IMM: { + if (!parseImmediate<uint8_t>(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + case WebAssembly::OPERAND_VEC_I16IMM: { + if (!parseImmediate<uint16_t>(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + case WebAssembly::OPERAND_VEC_I32IMM: { + if (!parseImmediate<uint32_t>(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + case WebAssembly::OPERAND_VEC_I64IMM: { + if (!parseImmediate<uint64_t>(MI, Size, Bytes)) + return MCDisassembler::Fail; + break; + } + case WebAssembly::OPERAND_BRLIST: { + int64_t TargetTableLen; + if (!nextLEB(TargetTableLen, Bytes, Size, false)) + return MCDisassembler::Fail; + for (int64_t I = 0; I < TargetTableLen; I++) { + if (!parseLEBImmediate(MI, Size, Bytes, false)) + return MCDisassembler::Fail; + } + // Default case. + if (!parseLEBImmediate(MI, Size, Bytes, false)) + return MCDisassembler::Fail; + break; + } + case MCOI::OPERAND_REGISTER: + // The tablegen header currently does not have any register operands since + // we use only the stack (_S) instructions. + // If you hit this that probably means a bad instruction definition in + // tablegen. + llvm_unreachable("Register operand in WebAssemblyDisassembler"); + default: + llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); + } + } + return MCDisassembler::Success; +} diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp new file mode 100644 index 000000000000..8314de41021f --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -0,0 +1,135 @@ +//===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the WebAssemblyAsmBackend class. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCWasmObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +class WebAssemblyAsmBackend final : public MCAsmBackend { + bool Is64Bit; + bool IsEmscripten; + +public: + explicit WebAssemblyAsmBackend(bool Is64Bit, bool IsEmscripten) + : MCAsmBackend(support::little), Is64Bit(Is64Bit), + IsEmscripten(IsEmscripten) {} + + unsigned getNumFixupKinds() const override { + return WebAssembly::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel, + const MCSubtargetInfo *STI) const override; + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override; + + // No instruction requires relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return false; + } + + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { + return false; + } + + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override {} + + bool writeNopData(raw_ostream &OS, uint64_t Count) const override; +}; + +const MCFixupKindInfo & +WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[WebAssembly::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined in + // WebAssemblyFixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + {"fixup_sleb128_i32", 0, 5 * 8, 0}, + {"fixup_sleb128_i64", 0, 10 * 8, 0}, + {"fixup_uleb128_i32", 0, 5 * 8, 0}, + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; +} + +bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS, + uint64_t Count) const { + for (uint64_t I = 0; I < Count; ++I) + OS << char(WebAssembly::Nop); + + return true; +} + +void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel, + const MCSubtargetInfo *STI) const { + const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); + assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags"); + + unsigned NumBytes = alignTo(Info.TargetSize, 8) / 8; + if (Value == 0) + return; // Doesn't change encoding. + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned I = 0; I != NumBytes; ++I) + Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff); +} + +std::unique_ptr<MCObjectTargetWriter> +WebAssemblyAsmBackend::createObjectTargetWriter() const { + return createWebAssemblyWasmObjectWriter(Is64Bit, IsEmscripten); +} + +} // end anonymous namespace + +MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) { + return new WebAssemblyAsmBackend(TT.isArch64Bit(), TT.isOSEmscripten()); +} diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h new file mode 100644 index 000000000000..33e8de282955 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h @@ -0,0 +1,28 @@ +//=- WebAssemblyFixupKinds.h - WebAssembly Specific Fixup Entries -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYFIXUPKINDS_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace WebAssembly { +enum Fixups { + fixup_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed + fixup_sleb128_i64, // 64-bit signed + fixup_uleb128_i32, // 32-bit unsigned + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // end namespace WebAssembly +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp new file mode 100644 index 000000000000..221ac17b8336 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -0,0 +1,339 @@ +//=- WebAssemblyInstPrinter.cpp - WebAssembly assembly instruction printing -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Print MCInst instructions to wasm format. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#include "WebAssemblyGenAsmWriter.inc" + +WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + +void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, + unsigned RegNo) const { + assert(RegNo != WebAssemblyFunctionInfo::UnusedReg); + // Note that there's an implicit local.get/local.set here! + OS << "$" << RegNo; +} + +void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot, + const MCSubtargetInfo &STI) { + // Print the instruction (this uses the AsmStrings from the .td files). + printInstruction(MI, OS); + + // Print any additional variadic operands. + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + if (Desc.isVariadic()) { + if (Desc.getNumOperands() == 0 && MI->getNumOperands() > 0) + OS << "\t"; + for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) { + // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because + // we have an extra flags operand which is not currently printed, for + // compatiblity reasons. + if (I != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID && + MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) || + I != Desc.getNumOperands())) + OS << ", "; + printOperand(MI, I, OS); + } + } + + // Print any added annotation. + printAnnotation(OS, Annot); + + if (CommentStream) { + // Observe any effects on the control flow stack, for use in annotating + // control flow label references. + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + break; + + case WebAssembly::LOOP: + case WebAssembly::LOOP_S: + printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':'); + ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true)); + break; + + case WebAssembly::BLOCK: + case WebAssembly::BLOCK_S: + ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false)); + break; + + case WebAssembly::TRY: + case WebAssembly::TRY_S: + ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false)); + EHPadStack.push_back(EHPadStackCounter++); + LastSeenEHInst = TRY; + break; + + case WebAssembly::END_LOOP: + case WebAssembly::END_LOOP_S: + if (ControlFlowStack.empty()) { + printAnnotation(OS, "End marker mismatch!"); + } else { + ControlFlowStack.pop_back(); + } + break; + + case WebAssembly::END_BLOCK: + case WebAssembly::END_BLOCK_S: + if (ControlFlowStack.empty()) { + printAnnotation(OS, "End marker mismatch!"); + } else { + printAnnotation( + OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':'); + } + break; + + case WebAssembly::END_TRY: + case WebAssembly::END_TRY_S: + if (ControlFlowStack.empty()) { + printAnnotation(OS, "End marker mismatch!"); + } else { + printAnnotation( + OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':'); + LastSeenEHInst = END_TRY; + } + break; + + case WebAssembly::CATCH: + case WebAssembly::CATCH_S: + if (EHPadStack.empty()) { + printAnnotation(OS, "try-catch mismatch!"); + } else { + printAnnotation(OS, "catch" + utostr(EHPadStack.pop_back_val()) + ':'); + } + break; + } + + // Annotate any control flow label references. + + // rethrow instruction does not take any depth argument and rethrows to the + // nearest enclosing catch scope, if any. If there's no enclosing catch + // scope, it throws up to the caller. + if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) { + if (EHPadStack.empty()) { + printAnnotation(OS, "to caller"); + } else { + printAnnotation(OS, "down to catch" + utostr(EHPadStack.back())); + } + + } else { + unsigned NumFixedOperands = Desc.NumOperands; + SmallSet<uint64_t, 8> Printed; + for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) { + // See if this operand denotes a basic block target. + if (I < NumFixedOperands) { + // A non-variable_ops operand, check its type. + if (Desc.OpInfo[I].OperandType != WebAssembly::OPERAND_BASIC_BLOCK) + continue; + } else { + // A variable_ops operand, which currently can be immediates (used in + // br_table) which are basic block targets, or for call instructions + // when using -wasm-keep-registers (in which case they are registers, + // and should not be processed). + if (!MI->getOperand(I).isImm()) + continue; + } + uint64_t Depth = MI->getOperand(I).getImm(); + if (!Printed.insert(Depth).second) + continue; + if (Depth >= ControlFlowStack.size()) { + printAnnotation(OS, "Invalid depth argument!"); + } else { + const auto &Pair = ControlFlowStack.rbegin()[Depth]; + printAnnotation(OS, utostr(Depth) + ": " + + (Pair.second ? "up" : "down") + " to label" + + utostr(Pair.first)); + } + } + } + } +} + +static std::string toString(const APFloat &FP) { + // Print NaNs with custom payloads specially. + if (FP.isNaN() && !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) && + !FP.bitwiseIsEqual( + APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) { + APInt AI = FP.bitcastToAPInt(); + return std::string(AI.isNegative() ? "-" : "") + "nan:0x" + + utohexstr(AI.getZExtValue() & + (AI.getBitWidth() == 32 ? INT64_C(0x007fffff) + : INT64_C(0x000fffffffffffff)), + /*LowerCase=*/true); + } + + // Use C99's hexadecimal floating-point representation. + static const size_t BufBytes = 128; + char Buf[BufBytes]; + auto Written = FP.convertToHexString( + Buf, /*HexDigits=*/0, /*UpperCase=*/false, APFloat::rmNearestTiesToEven); + (void)Written; + assert(Written != 0); + assert(Written < BufBytes); + return Buf; +} + +void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned WAReg = Op.getReg(); + if (int(WAReg) >= 0) + printRegName(O, WAReg); + else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs()) + O << "$pop" << WebAssemblyFunctionInfo::getWARegStackId(WAReg); + else if (WAReg != WebAssemblyFunctionInfo::UnusedReg) + O << "$push" << WebAssemblyFunctionInfo::getWARegStackId(WAReg); + else + O << "$drop"; + // Add a '=' suffix if this is a def. + if (OpNo < MII.get(MI->getOpcode()).getNumDefs()) + O << '='; + } else if (Op.isImm()) { + O << Op.getImm(); + } else if (Op.isFPImm()) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + const MCOperandInfo &Info = Desc.OpInfo[OpNo]; + if (Info.OperandType == WebAssembly::OPERAND_F32IMM) { + // TODO: MC converts all floating point immediate operands to double. + // This is fine for numeric values, but may cause NaNs to change bits. + O << ::toString(APFloat(float(Op.getFPImm()))); + } else { + assert(Info.OperandType == WebAssembly::OPERAND_F64IMM); + O << ::toString(APFloat(Op.getFPImm())); + } + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + // call_indirect instructions have a TYPEINDEX operand that we print + // as a signature here, such that the assembler can recover this + // information. + auto SRE = static_cast<const MCSymbolRefExpr *>(Op.getExpr()); + if (SRE->getKind() == MCSymbolRefExpr::VK_WASM_TYPEINDEX) { + auto &Sym = static_cast<const MCSymbolWasm &>(SRE->getSymbol()); + O << WebAssembly::signatureToString(Sym.getSignature()); + } else { + Op.getExpr()->print(O, &MAI); + } + } +} + +void WebAssemblyInstPrinter::printBrList(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << "{"; + for (unsigned I = OpNo, E = MI->getNumOperands(); I != E; ++I) { + if (I != OpNo) + O << ", "; + O << MI->getOperand(I).getImm(); + } + O << "}"; +} + +void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + int64_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode())) + return; + O << ":p2align=" << Imm; +} + +void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + auto Imm = static_cast<unsigned>(Op.getImm()); + if (Imm != wasm::WASM_TYPE_NORESULT) + O << WebAssembly::anyTypeToString(Imm); + } else { + auto Expr = cast<MCSymbolRefExpr>(Op.getExpr()); + auto *Sym = cast<MCSymbolWasm>(&Expr->getSymbol()); + if (Sym->getSignature()) { + O << WebAssembly::signatureToString(Sym->getSignature()); + } else { + // Disassembler does not currently produce a signature + O << "unknown_type"; + } + } +} + +// We have various enums representing a subset of these types, use this +// function to convert any of them to text. +const char *WebAssembly::anyTypeToString(unsigned Ty) { + switch (Ty) { + case wasm::WASM_TYPE_I32: + return "i32"; + case wasm::WASM_TYPE_I64: + return "i64"; + case wasm::WASM_TYPE_F32: + return "f32"; + case wasm::WASM_TYPE_F64: + return "f64"; + case wasm::WASM_TYPE_V128: + return "v128"; + case wasm::WASM_TYPE_FUNCREF: + return "funcref"; + case wasm::WASM_TYPE_FUNC: + return "func"; + case wasm::WASM_TYPE_EXNREF: + return "exnref"; + case wasm::WASM_TYPE_NORESULT: + return "void"; + default: + return "invalid_type"; + } +} + +const char *WebAssembly::typeToString(wasm::ValType Ty) { + return anyTypeToString(static_cast<unsigned>(Ty)); +} + +std::string WebAssembly::typeListToString(ArrayRef<wasm::ValType> List) { + std::string S; + for (auto &Ty : List) { + if (&Ty != &List[0]) S += ", "; + S += WebAssembly::typeToString(Ty); + } + return S; +} + +std::string WebAssembly::signatureToString(const wasm::WasmSignature *Sig) { + std::string S("("); + S += typeListToString(Sig->Params); + S += ") -> ("; + S += typeListToString(Sig->Returns); + S += ")"; + return S; +} diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h new file mode 100644 index 000000000000..cf37778099a0 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h @@ -0,0 +1,68 @@ +// WebAssemblyInstPrinter.h - Print wasm MCInst to assembly syntax -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This class prints an WebAssembly MCInst to wasm file syntax. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/MachineValueType.h" + +namespace llvm { + +class MCSubtargetInfo; + +class WebAssemblyInstPrinter final : public MCInstPrinter { + uint64_t ControlFlowCounter = 0; + uint64_t EHPadStackCounter = 0; + SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack; + SmallVector<uint64_t, 4> EHPadStack; + + enum EHInstKind { TRY, CATCH, END_TRY }; + EHInstKind LastSeenEHInst = END_TRY; + +public: + WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI); + + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; + + // Used by tblegen code. + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); +}; + +namespace WebAssembly { + +const char *typeToString(wasm::ValType Ty); +const char *anyTypeToString(unsigned Ty); + +std::string typeListToString(ArrayRef<wasm::ValType> List); +std::string signatureToString(const wasm::WasmSignature *Sig); + +} // end namespace WebAssembly + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp new file mode 100644 index 000000000000..8f6531563e1b --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -0,0 +1,47 @@ +//===-- WebAssemblyMCAsmInfo.cpp - WebAssembly asm properties -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declarations of the WebAssemblyMCAsmInfo +/// properties. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasm-mc-asm-info" + +WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() = default; // anchor. + +WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { + CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; + + // TODO: What should MaxInstLength be? + + UseDataRegionDirectives = true; + + // Use .skip instead of .zero because .zero is confusing when used with two + // arguments (it doesn't actually zero things out). + ZeroDirective = "\t.skip\t"; + + Data8bitsDirective = "\t.int8\t"; + Data16bitsDirective = "\t.int16\t"; + Data32bitsDirective = "\t.int32\t"; + Data64bitsDirective = "\t.int64\t"; + + AlignmentIsInBytes = false; + COMMDirectiveAlignmentIsInBytes = false; + LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; + + SupportsDebugInformation = true; + + // TODO: UseIntegratedAssembler? +} diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h new file mode 100644 index 000000000000..9efbbf881f59 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h @@ -0,0 +1,31 @@ +//===-- WebAssemblyMCAsmInfo.h - WebAssembly asm properties -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the WebAssemblyMCAsmInfo class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H + +#include "llvm/MC/MCAsmInfoWasm.h" + +namespace llvm { + +class Triple; + +class WebAssemblyMCAsmInfo final : public MCAsmInfoWasm { +public: + explicit WebAssemblyMCAsmInfo(const Triple &T); + ~WebAssemblyMCAsmInfo() override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp new file mode 100644 index 000000000000..1a4c57e66d2f --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -0,0 +1,176 @@ +//=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the WebAssemblyMCCodeEmitter class. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); +STATISTIC(MCNumFixups, "Number of MC fixups created."); + +namespace { +class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { + const MCInstrInfo &MCII; + + // Implementation generated by tablegen. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; + +public: + WebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) : MCII(MCII) {} +}; +} // end anonymous namespace + +MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) { + return new WebAssemblyMCCodeEmitter(MCII); +} + +void WebAssemblyMCCodeEmitter::encodeInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Start = OS.tell(); + + uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); + if (Binary <= UINT8_MAX) { + OS << uint8_t(Binary); + } else { + assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet"); + OS << uint8_t(Binary >> 8); + encodeULEB128(uint8_t(Binary), OS); + } + + // For br_table instructions, encode the size of the table. In the MCInst, + // there's an index operand (if not a stack instruction), one operand for + // each table entry, and the default operand. + if (MI.getOpcode() == WebAssembly::BR_TABLE_I32_S || + MI.getOpcode() == WebAssembly::BR_TABLE_I64_S) + encodeULEB128(MI.getNumOperands() - 1, OS); + if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 || + MI.getOpcode() == WebAssembly::BR_TABLE_I64) + encodeULEB128(MI.getNumOperands() - 2, OS); + + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const MCOperand &MO = MI.getOperand(I); + if (MO.isReg()) { + /* nothing to encode */ + + } else if (MO.isImm()) { + if (I < Desc.getNumOperands()) { + const MCOperandInfo &Info = Desc.OpInfo[I]; + LLVM_DEBUG(dbgs() << "Encoding immediate: type=" + << int(Info.OperandType) << "\n"); + switch (Info.OperandType) { + case WebAssembly::OPERAND_I32IMM: + encodeSLEB128(int32_t(MO.getImm()), OS); + break; + case WebAssembly::OPERAND_OFFSET32: + encodeULEB128(uint32_t(MO.getImm()), OS); + break; + case WebAssembly::OPERAND_I64IMM: + encodeSLEB128(int64_t(MO.getImm()), OS); + break; + case WebAssembly::OPERAND_SIGNATURE: + OS << uint8_t(MO.getImm()); + break; + case WebAssembly::OPERAND_VEC_I8IMM: + support::endian::write<uint8_t>(OS, MO.getImm(), support::little); + break; + case WebAssembly::OPERAND_VEC_I16IMM: + support::endian::write<uint16_t>(OS, MO.getImm(), support::little); + break; + case WebAssembly::OPERAND_VEC_I32IMM: + support::endian::write<uint32_t>(OS, MO.getImm(), support::little); + break; + case WebAssembly::OPERAND_VEC_I64IMM: + support::endian::write<uint64_t>(OS, MO.getImm(), support::little); + break; + case WebAssembly::OPERAND_GLOBAL: + llvm_unreachable("wasm globals should only be accessed symbolicly"); + default: + encodeULEB128(uint64_t(MO.getImm()), OS); + } + } else { + encodeULEB128(uint64_t(MO.getImm()), OS); + } + + } else if (MO.isFPImm()) { + const MCOperandInfo &Info = Desc.OpInfo[I]; + if (Info.OperandType == WebAssembly::OPERAND_F32IMM) { + // TODO: MC converts all floating point immediate operands to double. + // This is fine for numeric values, but may cause NaNs to change bits. + auto F = float(MO.getFPImm()); + support::endian::write<float>(OS, F, support::little); + } else { + assert(Info.OperandType == WebAssembly::OPERAND_F64IMM); + double D = MO.getFPImm(); + support::endian::write<double>(OS, D, support::little); + } + + } else if (MO.isExpr()) { + const MCOperandInfo &Info = Desc.OpInfo[I]; + llvm::MCFixupKind FixupKind; + size_t PaddedSize = 5; + switch (Info.OperandType) { + case WebAssembly::OPERAND_I32IMM: + FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i32); + break; + case WebAssembly::OPERAND_I64IMM: + FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i64); + PaddedSize = 10; + break; + case WebAssembly::OPERAND_FUNCTION32: + case WebAssembly::OPERAND_OFFSET32: + case WebAssembly::OPERAND_SIGNATURE: + case WebAssembly::OPERAND_TYPEINDEX: + case WebAssembly::OPERAND_GLOBAL: + case WebAssembly::OPERAND_EVENT: + FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32); + break; + default: + llvm_unreachable("unexpected symbolic operand kind"); + } + Fixups.push_back(MCFixup::create(OS.tell() - Start, MO.getExpr(), + FixupKind, MI.getLoc())); + ++MCNumFixups; + encodeULEB128(0, OS, PaddedSize); + } else { + llvm_unreachable("unexpected operand kind"); + } + } + + ++MCNumEmitted; // Keep track of the # of mi's emitted. +} + +#include "WebAssemblyGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp new file mode 100644 index 000000000000..9c8ca1f13b18 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -0,0 +1,154 @@ +//===-- WebAssemblyMCTargetDesc.cpp - WebAssembly Target Descriptions -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides WebAssembly-specific target descriptions. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "MCTargetDesc/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCAsmInfo.h" +#include "MCTargetDesc/WebAssemblyTargetStreamer.h" +#include "TargetInfo/WebAssemblyTargetInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-mc-target-desc" + +#define GET_INSTRINFO_MC_DESC +#include "WebAssemblyGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "WebAssemblyGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "WebAssemblyGenRegisterInfo.inc" + +static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/, + const Triple &TT) { + return new WebAssemblyMCAsmInfo(TT); +} + +static MCInstrInfo *createMCInstrInfo() { + auto *X = new MCInstrInfo(); + InitWebAssemblyMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createMCRegisterInfo(const Triple & /*T*/) { + auto *X = new MCRegisterInfo(); + InitWebAssemblyMCRegisterInfo(X, 0); + return X; +} + +static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + assert(SyntaxVariant == 0 && "WebAssembly only has one syntax variant"); + return new WebAssemblyInstPrinter(MAI, MII, MRI); +} + +static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo & /*MRI*/, + MCContext &Ctx) { + return createWebAssemblyMCCodeEmitter(MCII); +} + +static MCAsmBackend *createAsmBackend(const Target & /*T*/, + const MCSubtargetInfo &STI, + const MCRegisterInfo & /*MRI*/, + const MCTargetOptions & /*Options*/) { + return createWebAssemblyAsmBackend(STI.getTargetTriple()); +} + +static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU, + StringRef FS) { + return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, FS); +} + +static MCTargetStreamer * +createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new WebAssemblyTargetWasmStreamer(S); +} + +static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter * /*InstPrint*/, + bool /*isVerboseAsm*/) { + return new WebAssemblyTargetAsmStreamer(S, OS); +} + +static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) { + return new WebAssemblyTargetNullStreamer(S); +} + +// Force static initialization. +extern "C" void LLVMInitializeWebAssemblyTargetMC() { + for (Target *T : + {&getTheWebAssemblyTarget32(), &getTheWebAssemblyTarget64()}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createMCAsmInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createMCRegisterInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createMCInstPrinter); + + // Register the MC code emitter. + TargetRegistry::RegisterMCCodeEmitter(*T, createCodeEmitter); + + // Register the ASM Backend. + TargetRegistry::RegisterMCAsmBackend(*T, createAsmBackend); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createMCSubtargetInfo); + + // Register the object target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createObjectTargetStreamer); + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer); + // Register the null target streamer. + TargetRegistry::RegisterNullTargetStreamer(*T, createNullTargetStreamer); + } +} + +wasm::ValType WebAssembly::toValType(const MVT &Ty) { + switch (Ty.SimpleTy) { + case MVT::i32: + return wasm::ValType::I32; + case MVT::i64: + return wasm::ValType::I64; + case MVT::f32: + return wasm::ValType::F32; + case MVT::f64: + return wasm::ValType::F64; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + return wasm::ValType::V128; + case MVT::exnref: + return wasm::ValType::EXNREF; + default: + llvm_unreachable("unexpected type"); + } +} diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h new file mode 100644 index 000000000000..b339860a381d --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -0,0 +1,596 @@ +//==- WebAssemblyMCTargetDesc.h - WebAssembly Target Descriptions -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides WebAssembly-specific target descriptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H + +#include "../WebAssemblySubtarget.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/DataTypes.h" +#include <memory> + +namespace llvm { + +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCSubtargetInfo; +class MVT; +class Target; +class Triple; +class raw_pwrite_stream; + +MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); + +MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); + +std::unique_ptr<MCObjectTargetWriter> +createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten); + +namespace WebAssembly { +enum OperandType { + /// Basic block label in a branch construct. + OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET, + /// Local index. + OPERAND_LOCAL, + /// Global index. + OPERAND_GLOBAL, + /// 32-bit integer immediates. + OPERAND_I32IMM, + /// 64-bit integer immediates. + OPERAND_I64IMM, + /// 32-bit floating-point immediates. + OPERAND_F32IMM, + /// 64-bit floating-point immediates. + OPERAND_F64IMM, + /// 8-bit vector lane immediate + OPERAND_VEC_I8IMM, + /// 16-bit vector lane immediate + OPERAND_VEC_I16IMM, + /// 32-bit vector lane immediate + OPERAND_VEC_I32IMM, + /// 64-bit vector lane immediate + OPERAND_VEC_I64IMM, + /// 32-bit unsigned function indices. + OPERAND_FUNCTION32, + /// 32-bit unsigned memory offsets. + OPERAND_OFFSET32, + /// p2align immediate for load and store address alignment. + OPERAND_P2ALIGN, + /// signature immediate for block/loop. + OPERAND_SIGNATURE, + /// type signature immediate for call_indirect. + OPERAND_TYPEINDEX, + /// Event index. + OPERAND_EVENT, + /// A list of branch targets for br_list. + OPERAND_BRLIST, +}; +} // end namespace WebAssembly + +namespace WebAssemblyII { + +/// Target Operand Flag enum. +enum TOF { + MO_NO_FLAG = 0, + + // On a symbol operand this indicates that the immediate is a wasm global + // index. The value of the wasm global will be set to the symbol address at + // runtime. This adds a level of indirection similar to the GOT on native + // platforms. + MO_GOT, + + // On a symbol operand this indicates that the immediate is the symbol + // address relative the __memory_base wasm global. + // Only applicable to data symbols. + MO_MEMORY_BASE_REL, + + // On a symbol operand this indicates that the immediate is the symbol + // address relative the __table_base wasm global. + // Only applicable to function symbols. + MO_TABLE_BASE_REL, +}; + +} // end namespace WebAssemblyII + +} // end namespace llvm + +// Defines symbolic names for WebAssembly registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "WebAssemblyGenRegisterInfo.inc" + +// Defines symbolic names for the WebAssembly instructions. +// +#define GET_INSTRINFO_ENUM +#include "WebAssemblyGenInstrInfo.inc" + +namespace llvm { +namespace WebAssembly { + +/// Used as immediate MachineOperands for block signatures +enum class BlockType : unsigned { + Invalid = 0x00, + Void = 0x40, + I32 = unsigned(wasm::ValType::I32), + I64 = unsigned(wasm::ValType::I64), + F32 = unsigned(wasm::ValType::F32), + F64 = unsigned(wasm::ValType::F64), + V128 = unsigned(wasm::ValType::V128), + Exnref = unsigned(wasm::ValType::EXNREF), + // Multivalue blocks (and other non-void blocks) are only emitted when the + // blocks will never be exited and are at the ends of functions (see + // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made + // to pop values off the stack, so the exact multivalue signature can always + // be inferred from the return type of the parent function in MCInstLower. + Multivalue = 0xffff, +}; + +/// Instruction opcodes emitted via means other than CodeGen. +static const unsigned Nop = 0x01; +static const unsigned End = 0x0b; + +wasm::ValType toValType(const MVT &Ty); + +/// Return the default p2align value for a load or store with the given opcode. +inline unsigned GetDefaultP2AlignAny(unsigned Opc) { + switch (Opc) { + case WebAssembly::LOAD8_S_I32: + case WebAssembly::LOAD8_S_I32_S: + case WebAssembly::LOAD8_U_I32: + case WebAssembly::LOAD8_U_I32_S: + case WebAssembly::LOAD8_S_I64: + case WebAssembly::LOAD8_S_I64_S: + case WebAssembly::LOAD8_U_I64: + case WebAssembly::LOAD8_U_I64_S: + case WebAssembly::ATOMIC_LOAD8_U_I32: + case WebAssembly::ATOMIC_LOAD8_U_I32_S: + case WebAssembly::ATOMIC_LOAD8_U_I64: + case WebAssembly::ATOMIC_LOAD8_U_I64_S: + case WebAssembly::STORE8_I32: + case WebAssembly::STORE8_I32_S: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE8_I64_S: + case WebAssembly::ATOMIC_STORE8_I32: + case WebAssembly::ATOMIC_STORE8_I32_S: + case WebAssembly::ATOMIC_STORE8_I64: + case WebAssembly::ATOMIC_STORE8_I64_S: + case WebAssembly::ATOMIC_RMW8_U_ADD_I32: + case WebAssembly::ATOMIC_RMW8_U_ADD_I32_S: + case WebAssembly::ATOMIC_RMW8_U_ADD_I64: + case WebAssembly::ATOMIC_RMW8_U_ADD_I64_S: + case WebAssembly::ATOMIC_RMW8_U_SUB_I32: + case WebAssembly::ATOMIC_RMW8_U_SUB_I32_S: + case WebAssembly::ATOMIC_RMW8_U_SUB_I64: + case WebAssembly::ATOMIC_RMW8_U_SUB_I64_S: + case WebAssembly::ATOMIC_RMW8_U_AND_I32: + case WebAssembly::ATOMIC_RMW8_U_AND_I32_S: + case WebAssembly::ATOMIC_RMW8_U_AND_I64: + case WebAssembly::ATOMIC_RMW8_U_AND_I64_S: + case WebAssembly::ATOMIC_RMW8_U_OR_I32: + case WebAssembly::ATOMIC_RMW8_U_OR_I32_S: + case WebAssembly::ATOMIC_RMW8_U_OR_I64: + case WebAssembly::ATOMIC_RMW8_U_OR_I64_S: + case WebAssembly::ATOMIC_RMW8_U_XOR_I32: + case WebAssembly::ATOMIC_RMW8_U_XOR_I32_S: + case WebAssembly::ATOMIC_RMW8_U_XOR_I64: + case WebAssembly::ATOMIC_RMW8_U_XOR_I64_S: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S: + case WebAssembly::LOAD_SPLAT_v8x16: + case WebAssembly::LOAD_SPLAT_v8x16_S: + return 0; + case WebAssembly::LOAD16_S_I32: + case WebAssembly::LOAD16_S_I32_S: + case WebAssembly::LOAD16_U_I32: + case WebAssembly::LOAD16_U_I32_S: + case WebAssembly::LOAD16_S_I64: + case WebAssembly::LOAD16_S_I64_S: + case WebAssembly::LOAD16_U_I64: + case WebAssembly::LOAD16_U_I64_S: + case WebAssembly::ATOMIC_LOAD16_U_I32: + case WebAssembly::ATOMIC_LOAD16_U_I32_S: + case WebAssembly::ATOMIC_LOAD16_U_I64: + case WebAssembly::ATOMIC_LOAD16_U_I64_S: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE16_I32_S: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE16_I64_S: + case WebAssembly::ATOMIC_STORE16_I32: + case WebAssembly::ATOMIC_STORE16_I32_S: + case WebAssembly::ATOMIC_STORE16_I64: + case WebAssembly::ATOMIC_STORE16_I64_S: + case WebAssembly::ATOMIC_RMW16_U_ADD_I32: + case WebAssembly::ATOMIC_RMW16_U_ADD_I32_S: + case WebAssembly::ATOMIC_RMW16_U_ADD_I64: + case WebAssembly::ATOMIC_RMW16_U_ADD_I64_S: + case WebAssembly::ATOMIC_RMW16_U_SUB_I32: + case WebAssembly::ATOMIC_RMW16_U_SUB_I32_S: + case WebAssembly::ATOMIC_RMW16_U_SUB_I64: + case WebAssembly::ATOMIC_RMW16_U_SUB_I64_S: + case WebAssembly::ATOMIC_RMW16_U_AND_I32: + case WebAssembly::ATOMIC_RMW16_U_AND_I32_S: + case WebAssembly::ATOMIC_RMW16_U_AND_I64: + case WebAssembly::ATOMIC_RMW16_U_AND_I64_S: + case WebAssembly::ATOMIC_RMW16_U_OR_I32: + case WebAssembly::ATOMIC_RMW16_U_OR_I32_S: + case WebAssembly::ATOMIC_RMW16_U_OR_I64: + case WebAssembly::ATOMIC_RMW16_U_OR_I64_S: + case WebAssembly::ATOMIC_RMW16_U_XOR_I32: + case WebAssembly::ATOMIC_RMW16_U_XOR_I32_S: + case WebAssembly::ATOMIC_RMW16_U_XOR_I64: + case WebAssembly::ATOMIC_RMW16_U_XOR_I64_S: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S: + case WebAssembly::LOAD_SPLAT_v16x8: + case WebAssembly::LOAD_SPLAT_v16x8_S: + return 1; + case WebAssembly::LOAD_I32: + case WebAssembly::LOAD_I32_S: + case WebAssembly::LOAD_F32: + case WebAssembly::LOAD_F32_S: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I32_S: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F32_S: + case WebAssembly::LOAD32_S_I64: + case WebAssembly::LOAD32_S_I64_S: + case WebAssembly::LOAD32_U_I64: + case WebAssembly::LOAD32_U_I64_S: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE32_I64_S: + case WebAssembly::ATOMIC_LOAD_I32: + case WebAssembly::ATOMIC_LOAD_I32_S: + case WebAssembly::ATOMIC_LOAD32_U_I64: + case WebAssembly::ATOMIC_LOAD32_U_I64_S: + case WebAssembly::ATOMIC_STORE_I32: + case WebAssembly::ATOMIC_STORE_I32_S: + case WebAssembly::ATOMIC_STORE32_I64: + case WebAssembly::ATOMIC_STORE32_I64_S: + case WebAssembly::ATOMIC_RMW_ADD_I32: + case WebAssembly::ATOMIC_RMW_ADD_I32_S: + case WebAssembly::ATOMIC_RMW32_U_ADD_I64: + case WebAssembly::ATOMIC_RMW32_U_ADD_I64_S: + case WebAssembly::ATOMIC_RMW_SUB_I32: + case WebAssembly::ATOMIC_RMW_SUB_I32_S: + case WebAssembly::ATOMIC_RMW32_U_SUB_I64: + case WebAssembly::ATOMIC_RMW32_U_SUB_I64_S: + case WebAssembly::ATOMIC_RMW_AND_I32: + case WebAssembly::ATOMIC_RMW_AND_I32_S: + case WebAssembly::ATOMIC_RMW32_U_AND_I64: + case WebAssembly::ATOMIC_RMW32_U_AND_I64_S: + case WebAssembly::ATOMIC_RMW_OR_I32: + case WebAssembly::ATOMIC_RMW_OR_I32_S: + case WebAssembly::ATOMIC_RMW32_U_OR_I64: + case WebAssembly::ATOMIC_RMW32_U_OR_I64_S: + case WebAssembly::ATOMIC_RMW_XOR_I32: + case WebAssembly::ATOMIC_RMW_XOR_I32_S: + case WebAssembly::ATOMIC_RMW32_U_XOR_I64: + case WebAssembly::ATOMIC_RMW32_U_XOR_I64_S: + case WebAssembly::ATOMIC_RMW_XCHG_I32: + case WebAssembly::ATOMIC_RMW_XCHG_I32_S: + case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I32_S: + case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64_S: + case WebAssembly::ATOMIC_NOTIFY: + case WebAssembly::ATOMIC_NOTIFY_S: + case WebAssembly::ATOMIC_WAIT_I32: + case WebAssembly::ATOMIC_WAIT_I32_S: + case WebAssembly::LOAD_SPLAT_v32x4: + case WebAssembly::LOAD_SPLAT_v32x4_S: + return 2; + case WebAssembly::LOAD_I64: + case WebAssembly::LOAD_I64_S: + case WebAssembly::LOAD_F64: + case WebAssembly::LOAD_F64_S: + case WebAssembly::STORE_I64: + case WebAssembly::STORE_I64_S: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_F64_S: + case WebAssembly::ATOMIC_LOAD_I64: + case WebAssembly::ATOMIC_LOAD_I64_S: + case WebAssembly::ATOMIC_STORE_I64: + case WebAssembly::ATOMIC_STORE_I64_S: + case WebAssembly::ATOMIC_RMW_ADD_I64: + case WebAssembly::ATOMIC_RMW_ADD_I64_S: + case WebAssembly::ATOMIC_RMW_SUB_I64: + case WebAssembly::ATOMIC_RMW_SUB_I64_S: + case WebAssembly::ATOMIC_RMW_AND_I64: + case WebAssembly::ATOMIC_RMW_AND_I64_S: + case WebAssembly::ATOMIC_RMW_OR_I64: + case WebAssembly::ATOMIC_RMW_OR_I64_S: + case WebAssembly::ATOMIC_RMW_XOR_I64: + case WebAssembly::ATOMIC_RMW_XOR_I64_S: + case WebAssembly::ATOMIC_RMW_XCHG_I64: + case WebAssembly::ATOMIC_RMW_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S: + case WebAssembly::ATOMIC_WAIT_I64: + case WebAssembly::ATOMIC_WAIT_I64_S: + case WebAssembly::LOAD_SPLAT_v64x2: + case WebAssembly::LOAD_SPLAT_v64x2_S: + case WebAssembly::LOAD_EXTEND_S_v8i16: + case WebAssembly::LOAD_EXTEND_S_v8i16_S: + case WebAssembly::LOAD_EXTEND_U_v8i16: + case WebAssembly::LOAD_EXTEND_U_v8i16_S: + case WebAssembly::LOAD_EXTEND_S_v4i32: + case WebAssembly::LOAD_EXTEND_S_v4i32_S: + case WebAssembly::LOAD_EXTEND_U_v4i32: + case WebAssembly::LOAD_EXTEND_U_v4i32_S: + case WebAssembly::LOAD_EXTEND_S_v2i64: + case WebAssembly::LOAD_EXTEND_S_v2i64_S: + case WebAssembly::LOAD_EXTEND_U_v2i64: + case WebAssembly::LOAD_EXTEND_U_v2i64_S: + return 3; + case WebAssembly::LOAD_V128: + case WebAssembly::LOAD_V128_S: + case WebAssembly::STORE_V128: + case WebAssembly::STORE_V128_S: + return 4; + default: + return -1; + } +} + +inline unsigned GetDefaultP2Align(unsigned Opc) { + auto Align = GetDefaultP2AlignAny(Opc); + if (Align == -1U) { + llvm_unreachable("Only loads and stores have p2align values"); + } + return Align; +} + +inline bool isArgument(unsigned Opc) { + switch (Opc) { + case WebAssembly::ARGUMENT_i32: + case WebAssembly::ARGUMENT_i32_S: + case WebAssembly::ARGUMENT_i64: + case WebAssembly::ARGUMENT_i64_S: + case WebAssembly::ARGUMENT_f32: + case WebAssembly::ARGUMENT_f32_S: + case WebAssembly::ARGUMENT_f64: + case WebAssembly::ARGUMENT_f64_S: + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v16i8_S: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v8i16_S: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4i32_S: + case WebAssembly::ARGUMENT_v2i64: + case WebAssembly::ARGUMENT_v2i64_S: + case WebAssembly::ARGUMENT_v4f32: + case WebAssembly::ARGUMENT_v4f32_S: + case WebAssembly::ARGUMENT_v2f64: + case WebAssembly::ARGUMENT_v2f64_S: + case WebAssembly::ARGUMENT_exnref: + case WebAssembly::ARGUMENT_exnref_S: + return true; + default: + return false; + } +} + +inline bool isCopy(unsigned Opc) { + switch (Opc) { + case WebAssembly::COPY_I32: + case WebAssembly::COPY_I32_S: + case WebAssembly::COPY_I64: + case WebAssembly::COPY_I64_S: + case WebAssembly::COPY_F32: + case WebAssembly::COPY_F32_S: + case WebAssembly::COPY_F64: + case WebAssembly::COPY_F64_S: + case WebAssembly::COPY_V128: + case WebAssembly::COPY_V128_S: + case WebAssembly::COPY_EXNREF: + case WebAssembly::COPY_EXNREF_S: + return true; + default: + return false; + } +} + +inline bool isTee(unsigned Opc) { + switch (Opc) { + case WebAssembly::TEE_I32: + case WebAssembly::TEE_I32_S: + case WebAssembly::TEE_I64: + case WebAssembly::TEE_I64_S: + case WebAssembly::TEE_F32: + case WebAssembly::TEE_F32_S: + case WebAssembly::TEE_F64: + case WebAssembly::TEE_F64_S: + case WebAssembly::TEE_V128: + case WebAssembly::TEE_V128_S: + case WebAssembly::TEE_EXNREF: + case WebAssembly::TEE_EXNREF_S: + return true; + default: + return false; + } +} + +inline bool isCallDirect(unsigned Opc) { + switch (Opc) { + case WebAssembly::CALL_VOID: + case WebAssembly::CALL_VOID_S: + case WebAssembly::CALL_i32: + case WebAssembly::CALL_i32_S: + case WebAssembly::CALL_i64: + case WebAssembly::CALL_i64_S: + case WebAssembly::CALL_f32: + case WebAssembly::CALL_f32_S: + case WebAssembly::CALL_f64: + case WebAssembly::CALL_f64_S: + case WebAssembly::CALL_v16i8: + case WebAssembly::CALL_v16i8_S: + case WebAssembly::CALL_v8i16: + case WebAssembly::CALL_v8i16_S: + case WebAssembly::CALL_v4i32: + case WebAssembly::CALL_v4i32_S: + case WebAssembly::CALL_v2i64: + case WebAssembly::CALL_v2i64_S: + case WebAssembly::CALL_v4f32: + case WebAssembly::CALL_v4f32_S: + case WebAssembly::CALL_v2f64: + case WebAssembly::CALL_v2f64_S: + case WebAssembly::CALL_exnref: + case WebAssembly::CALL_exnref_S: + case WebAssembly::RET_CALL: + case WebAssembly::RET_CALL_S: + return true; + default: + return false; + } +} + +inline bool isCallIndirect(unsigned Opc) { + switch (Opc) { + case WebAssembly::CALL_INDIRECT_VOID: + case WebAssembly::CALL_INDIRECT_VOID_S: + case WebAssembly::CALL_INDIRECT_i32: + case WebAssembly::CALL_INDIRECT_i32_S: + case WebAssembly::CALL_INDIRECT_i64: + case WebAssembly::CALL_INDIRECT_i64_S: + case WebAssembly::CALL_INDIRECT_f32: + case WebAssembly::CALL_INDIRECT_f32_S: + case WebAssembly::CALL_INDIRECT_f64: + case WebAssembly::CALL_INDIRECT_f64_S: + case WebAssembly::CALL_INDIRECT_v16i8: + case WebAssembly::CALL_INDIRECT_v16i8_S: + case WebAssembly::CALL_INDIRECT_v8i16: + case WebAssembly::CALL_INDIRECT_v8i16_S: + case WebAssembly::CALL_INDIRECT_v4i32: + case WebAssembly::CALL_INDIRECT_v4i32_S: + case WebAssembly::CALL_INDIRECT_v2i64: + case WebAssembly::CALL_INDIRECT_v2i64_S: + case WebAssembly::CALL_INDIRECT_v4f32: + case WebAssembly::CALL_INDIRECT_v4f32_S: + case WebAssembly::CALL_INDIRECT_v2f64: + case WebAssembly::CALL_INDIRECT_v2f64_S: + case WebAssembly::CALL_INDIRECT_exnref: + case WebAssembly::CALL_INDIRECT_exnref_S: + case WebAssembly::RET_CALL_INDIRECT: + case WebAssembly::RET_CALL_INDIRECT_S: + return true; + default: + return false; + } +} + +/// Returns the operand number of a callee, assuming the argument is a call +/// instruction. +inline unsigned getCalleeOpNo(unsigned Opc) { + switch (Opc) { + case WebAssembly::CALL_VOID: + case WebAssembly::CALL_VOID_S: + case WebAssembly::CALL_INDIRECT_VOID: + case WebAssembly::CALL_INDIRECT_VOID_S: + case WebAssembly::RET_CALL: + case WebAssembly::RET_CALL_S: + case WebAssembly::RET_CALL_INDIRECT: + case WebAssembly::RET_CALL_INDIRECT_S: + return 0; + case WebAssembly::CALL_i32: + case WebAssembly::CALL_i32_S: + case WebAssembly::CALL_i64: + case WebAssembly::CALL_i64_S: + case WebAssembly::CALL_f32: + case WebAssembly::CALL_f32_S: + case WebAssembly::CALL_f64: + case WebAssembly::CALL_f64_S: + case WebAssembly::CALL_v16i8: + case WebAssembly::CALL_v16i8_S: + case WebAssembly::CALL_v8i16: + case WebAssembly::CALL_v8i16_S: + case WebAssembly::CALL_v4i32: + case WebAssembly::CALL_v4i32_S: + case WebAssembly::CALL_v2i64: + case WebAssembly::CALL_v2i64_S: + case WebAssembly::CALL_v4f32: + case WebAssembly::CALL_v4f32_S: + case WebAssembly::CALL_v2f64: + case WebAssembly::CALL_v2f64_S: + case WebAssembly::CALL_exnref: + case WebAssembly::CALL_exnref_S: + case WebAssembly::CALL_INDIRECT_i32: + case WebAssembly::CALL_INDIRECT_i32_S: + case WebAssembly::CALL_INDIRECT_i64: + case WebAssembly::CALL_INDIRECT_i64_S: + case WebAssembly::CALL_INDIRECT_f32: + case WebAssembly::CALL_INDIRECT_f32_S: + case WebAssembly::CALL_INDIRECT_f64: + case WebAssembly::CALL_INDIRECT_f64_S: + case WebAssembly::CALL_INDIRECT_v16i8: + case WebAssembly::CALL_INDIRECT_v16i8_S: + case WebAssembly::CALL_INDIRECT_v8i16: + case WebAssembly::CALL_INDIRECT_v8i16_S: + case WebAssembly::CALL_INDIRECT_v4i32: + case WebAssembly::CALL_INDIRECT_v4i32_S: + case WebAssembly::CALL_INDIRECT_v2i64: + case WebAssembly::CALL_INDIRECT_v2i64_S: + case WebAssembly::CALL_INDIRECT_v4f32: + case WebAssembly::CALL_INDIRECT_v4f32_S: + case WebAssembly::CALL_INDIRECT_v2f64: + case WebAssembly::CALL_INDIRECT_v2f64_S: + case WebAssembly::CALL_INDIRECT_exnref: + case WebAssembly::CALL_INDIRECT_exnref_S: + return 1; + default: + llvm_unreachable("Not a call instruction"); + } +} + +inline bool isMarker(unsigned Opc) { + switch (Opc) { + case WebAssembly::BLOCK: + case WebAssembly::BLOCK_S: + case WebAssembly::END_BLOCK: + case WebAssembly::END_BLOCK_S: + case WebAssembly::LOOP: + case WebAssembly::LOOP_S: + case WebAssembly::END_LOOP: + case WebAssembly::END_LOOP_S: + case WebAssembly::TRY: + case WebAssembly::TRY_S: + case WebAssembly::END_TRY: + case WebAssembly::END_TRY_S: + return true; + default: + return false; + } +} + +} // end namespace WebAssembly +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp new file mode 100644 index 000000000000..40926201931a --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -0,0 +1,123 @@ +//==-- WebAssemblyTargetStreamer.cpp - WebAssembly Target Streamer Methods --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines WebAssembly-specific target streamer classes. +/// These are for implementing support for target-specific assembly directives. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyTargetStreamer.h" +#include "MCTargetDesc/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S) {} + +void WebAssemblyTargetStreamer::emitValueType(wasm::ValType Type) { + Streamer.EmitIntValue(uint8_t(Type), 1); +} + +WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer( + MCStreamer &S, formatted_raw_ostream &OS) + : WebAssemblyTargetStreamer(S), OS(OS) {} + +WebAssemblyTargetWasmStreamer::WebAssemblyTargetWasmStreamer(MCStreamer &S) + : WebAssemblyTargetStreamer(S) {} + +static void printTypes(formatted_raw_ostream &OS, + ArrayRef<wasm::ValType> Types) { + bool First = true; + for (auto Type : Types) { + if (First) + First = false; + else + OS << ", "; + OS << WebAssembly::typeToString(Type); + } + OS << '\n'; +} + +void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) { + if (!Types.empty()) { + OS << "\t.local \t"; + printTypes(OS, Types); + } +} + +void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } + +void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) { + assert(Sym->isFunction()); + OS << "\t.functype\t" << Sym->getName() << " "; + OS << WebAssembly::signatureToString(Sym->getSignature()); + OS << "\n"; +} + +void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) { + assert(Sym->isGlobal()); + OS << "\t.globaltype\t" << Sym->getName() << ", " + << WebAssembly::typeToString( + static_cast<wasm::ValType>(Sym->getGlobalType().Type)) + << '\n'; +} + +void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) { + assert(Sym->isEvent()); + OS << "\t.eventtype\t" << Sym->getName() << " "; + OS << WebAssembly::typeListToString(Sym->getSignature()->Params); + OS << "\n"; +} + +void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym, + StringRef ImportModule) { + OS << "\t.import_module\t" << Sym->getName() << ", " + << ImportModule << '\n'; +} + +void WebAssemblyTargetAsmStreamer::emitImportName(const MCSymbolWasm *Sym, + StringRef ImportName) { + OS << "\t.import_name\t" << Sym->getName() << ", " + << ImportName << '\n'; +} + +void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) { + OS << "\t.indidx \t" << *Value << '\n'; +} + +void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) { + SmallVector<std::pair<wasm::ValType, uint32_t>, 4> Grouped; + for (auto Type : Types) { + if (Grouped.empty() || Grouped.back().first != Type) + Grouped.push_back(std::make_pair(Type, 1)); + else + ++Grouped.back().second; + } + + Streamer.EmitULEB128IntValue(Grouped.size()); + for (auto Pair : Grouped) { + Streamer.EmitULEB128IntValue(Pair.second); + emitValueType(Pair.first); + } +} + +void WebAssemblyTargetWasmStreamer::emitEndFunc() { + llvm_unreachable(".end_func is not needed for direct wasm output"); +} + +void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) { + llvm_unreachable(".indidx encoding not yet implemented"); +} diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h new file mode 100644 index 000000000000..0164f8e572ef --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -0,0 +1,108 @@ +//==-- WebAssemblyTargetStreamer.h - WebAssembly Target Streamer -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares WebAssembly-specific target streamer classes. +/// These are for implementing support for target-specific assembly directives. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H + +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/MachineValueType.h" + +namespace llvm { + +class MCWasmStreamer; +class MCSymbolWasm; + +/// WebAssembly-specific streamer interface, to implement support +/// WebAssembly-specific assembly directives. +class WebAssemblyTargetStreamer : public MCTargetStreamer { +public: + explicit WebAssemblyTargetStreamer(MCStreamer &S); + + /// .local + virtual void emitLocal(ArrayRef<wasm::ValType> Types) = 0; + /// .endfunc + virtual void emitEndFunc() = 0; + /// .functype + virtual void emitFunctionType(const MCSymbolWasm *Sym) = 0; + /// .indidx + virtual void emitIndIdx(const MCExpr *Value) = 0; + /// .globaltype + virtual void emitGlobalType(const MCSymbolWasm *Sym) = 0; + /// .eventtype + virtual void emitEventType(const MCSymbolWasm *Sym) = 0; + /// .import_module + virtual void emitImportModule(const MCSymbolWasm *Sym, + StringRef ImportModule) = 0; + /// .import_name + virtual void emitImportName(const MCSymbolWasm *Sym, + StringRef ImportName) = 0; + +protected: + void emitValueType(wasm::ValType Type); +}; + +/// This part is for ascii assembly output +class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { + formatted_raw_ostream &OS; + +public: + WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + + void emitLocal(ArrayRef<wasm::ValType> Types) override; + void emitEndFunc() override; + void emitFunctionType(const MCSymbolWasm *Sym) override; + void emitIndIdx(const MCExpr *Value) override; + void emitGlobalType(const MCSymbolWasm *Sym) override; + void emitEventType(const MCSymbolWasm *Sym) override; + void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override; + void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override; +}; + +/// This part is for Wasm object output +class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer { +public: + explicit WebAssemblyTargetWasmStreamer(MCStreamer &S); + + void emitLocal(ArrayRef<wasm::ValType> Types) override; + void emitEndFunc() override; + void emitFunctionType(const MCSymbolWasm *Sym) override {} + void emitIndIdx(const MCExpr *Value) override; + void emitGlobalType(const MCSymbolWasm *Sym) override {} + void emitEventType(const MCSymbolWasm *Sym) override {} + void emitImportModule(const MCSymbolWasm *Sym, + StringRef ImportModule) override {} + void emitImportName(const MCSymbolWasm *Sym, + StringRef ImportName) override {} +}; + +/// This part is for null output +class WebAssemblyTargetNullStreamer final : public WebAssemblyTargetStreamer { +public: + explicit WebAssemblyTargetNullStreamer(MCStreamer &S) + : WebAssemblyTargetStreamer(S) {} + + void emitLocal(ArrayRef<wasm::ValType>) override {} + void emitEndFunc() override {} + void emitFunctionType(const MCSymbolWasm *) override {} + void emitIndIdx(const MCExpr *) override {} + void emitGlobalType(const MCSymbolWasm *) override {} + void emitEventType(const MCSymbolWasm *) override {} + void emitImportModule(const MCSymbolWasm *, StringRef) override {} + void emitImportName(const MCSymbolWasm *, StringRef) override {} +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp new file mode 100644 index 000000000000..e7a599e3e175 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -0,0 +1,122 @@ +//===-- WebAssemblyWasmObjectWriter.cpp - WebAssembly Wasm Writer ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file handles Wasm-specific object emission, converting LLVM's +/// internal fixups into the appropriate relocations. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCWasmObjectWriter.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { +class WebAssemblyWasmObjectWriter final : public MCWasmObjectTargetWriter { +public: + explicit WebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten); + +private: + unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup) const override; +}; +} // end anonymous namespace + +WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit, + bool IsEmscripten) + : MCWasmObjectTargetWriter(Is64Bit, IsEmscripten) {} + +static const MCSection *getFixupSection(const MCExpr *Expr) { + if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr)) { + if (SyExp->getSymbol().isInSection()) + return &SyExp->getSymbol().getSection(); + return nullptr; + } + + if (auto BinOp = dyn_cast<MCBinaryExpr>(Expr)) { + auto SectionLHS = getFixupSection(BinOp->getLHS()); + auto SectionRHS = getFixupSection(BinOp->getRHS()); + return SectionLHS == SectionRHS ? nullptr : SectionLHS; + } + + if (auto UnOp = dyn_cast<MCUnaryExpr>(Expr)) + return getFixupSection(UnOp->getSubExpr()); + + return nullptr; +} + +unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup) const { + const MCSymbolRefExpr *RefA = Target.getSymA(); + assert(RefA); + auto& SymA = cast<MCSymbolWasm>(RefA->getSymbol()); + + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); + + switch (Modifier) { + case MCSymbolRefExpr::VK_GOT: + return wasm::R_WASM_GLOBAL_INDEX_LEB; + case MCSymbolRefExpr::VK_WASM_TBREL: + assert(SymA.isFunction()); + return wasm::R_WASM_TABLE_INDEX_REL_SLEB; + case MCSymbolRefExpr::VK_WASM_MBREL: + assert(SymA.isData()); + return wasm::R_WASM_MEMORY_ADDR_REL_SLEB; + case MCSymbolRefExpr::VK_WASM_TYPEINDEX: + return wasm::R_WASM_TYPE_INDEX_LEB; + default: + break; + } + + switch (unsigned(Fixup.getKind())) { + case WebAssembly::fixup_sleb128_i32: + if (SymA.isFunction()) + return wasm::R_WASM_TABLE_INDEX_SLEB; + return wasm::R_WASM_MEMORY_ADDR_SLEB; + case WebAssembly::fixup_sleb128_i64: + llvm_unreachable("fixup_sleb128_i64 not implemented yet"); + case WebAssembly::fixup_uleb128_i32: + if (SymA.isGlobal()) + return wasm::R_WASM_GLOBAL_INDEX_LEB; + if (SymA.isFunction()) + return wasm::R_WASM_FUNCTION_INDEX_LEB; + if (SymA.isEvent()) + return wasm::R_WASM_EVENT_INDEX_LEB; + return wasm::R_WASM_MEMORY_ADDR_LEB; + case FK_Data_4: + if (SymA.isFunction()) + return wasm::R_WASM_TABLE_INDEX_I32; + if (auto Section = static_cast<const MCSectionWasm *>( + getFixupSection(Fixup.getValue()))) { + if (Section->getKind().isText()) + return wasm::R_WASM_FUNCTION_OFFSET_I32; + else if (!Section->isWasmData()) + return wasm::R_WASM_SECTION_OFFSET_I32; + } + return wasm::R_WASM_MEMORY_ADDR_I32; + default: + llvm_unreachable("unimplemented fixup kind"); + } +} + +std::unique_ptr<MCObjectTargetWriter> +llvm::createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten) { + return std::make_unique<WebAssemblyWasmObjectWriter>(Is64Bit, IsEmscripten); +} diff --git a/llvm/lib/Target/WebAssembly/README.txt b/llvm/lib/Target/WebAssembly/README.txt new file mode 100644 index 000000000000..ef3f5aaf7d33 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/README.txt @@ -0,0 +1,196 @@ +//===-- README.txt - Notes for WebAssembly code gen -----------------------===// + +This WebAssembly backend is presently under development. + +The most notable feature which is not yet stable is the ".o" file format. +".o" file support is needed for many common ways of using LLVM, such as +using it through "clang -c", so this backend is not yet considered widely +usable. However, this backend is usable within some language toolchain +packages: + +Emscripten provides a C/C++ compilation environment that includes standard +libraries, tools, and packaging for producing WebAssembly applications that +can run in browsers and other environments. For more information, see the +Emscripten documentation in general, and this page in particular: + + * https://github.com/kripken/emscripten/wiki/New-WebAssembly-Backend + +Rust provides WebAssembly support integrated into Cargo. There are two +main options: + - wasm32-unknown-unknown, which provides a relatively minimal environment + that has an emphasis on being "native" + - wasm32-unknown-emscripten, which uses Emscripten internally and + provides standard C/C++ libraries, filesystem emulation, GL and SDL + bindings +For more information, see: + * https://www.hellorust.com/ + + +This backend does not yet support debug info. Full DWARF support needs a +design for how DWARF should be represented in WebAssembly. Sourcemap support +has an existing design and some corresponding browser implementations, so it +just needs implementing in LLVM. + +Work-in-progress documentation for the ".o" file format is here: + + * https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + +A corresponding linker implementation is also under development: + + * https://lld.llvm.org/WebAssembly.html + +For more information on WebAssembly itself, see the home page: + * https://webassembly.github.io/ + +The following documents contain some information on the semantics and binary +encoding of WebAssembly itself: + * https://github.com/WebAssembly/design/blob/master/Semantics.md + * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md + +The backend is built, tested and archived on the following waterfall: + https://wasm-stat.us + +The backend's bringup is done in part by using the GCC torture test suite, since +it doesn't require C library support. Current known failures are in +known_gcc_test_failures.txt, all other tests should pass. The waterfall will +turn red if not. Once most of these pass, further testing will use LLVM's own +test suite. The tests can be run locally using: + https://github.com/WebAssembly/waterfall/blob/master/src/compile_torture_tests.py + +Some notes on ways that the generated code could be improved follow: + +//===---------------------------------------------------------------------===// + +Br, br_if, and br_table instructions can support having a value on the value +stack across the jump (sometimes). We should (a) model this, and (b) extend +the stackifier to utilize it. + +//===---------------------------------------------------------------------===// + +The min/max instructions aren't exactly a<b?a:b because of NaN and negative zero +behavior. The ARM target has the same kind of min/max instructions and has +implemented optimizations for them; we should do similar optimizations for +WebAssembly. + +//===---------------------------------------------------------------------===// + +AArch64 runs SeparateConstOffsetFromGEPPass, followed by EarlyCSE and LICM. +Would these be useful to run for WebAssembly too? Also, it has an option to +run SimplifyCFG after running the AtomicExpand pass. Would this be useful for +us too? + +//===---------------------------------------------------------------------===// + +Register stackification uses the VALUE_STACK physical register to impose +ordering dependencies on instructions with stack operands. This is pessimistic; +we should consider alternate ways to model stack dependencies. + +//===---------------------------------------------------------------------===// + +Lots of things could be done in WebAssemblyTargetTransformInfo.cpp. Similarly, +there are numerous optimization-related hooks that can be overridden in +WebAssemblyTargetLowering. + +//===---------------------------------------------------------------------===// + +Instead of the OptimizeReturned pass, which should consider preserving the +"returned" attribute through to MachineInstrs and extending the +MemIntrinsicResults pass to do this optimization on calls too. That would also +let the WebAssemblyPeephole pass clean up dead defs for such calls, as it does +for stores. + +//===---------------------------------------------------------------------===// + +Consider implementing optimizeSelect, optimizeCompareInstr, optimizeCondBranch, +optimizeLoadInstr, and/or getMachineCombinerPatterns. + +//===---------------------------------------------------------------------===// + +Find a clean way to fix the problem which leads to the Shrink Wrapping pass +being run after the WebAssembly PEI pass. + +//===---------------------------------------------------------------------===// + +When setting multiple local variables to the same constant, we currently get +code like this: + + i32.const $4=, 0 + i32.const $3=, 0 + +It could be done with a smaller encoding like this: + + i32.const $push5=, 0 + local.tee $push6=, $4=, $pop5 + local.copy $3=, $pop6 + +//===---------------------------------------------------------------------===// + +WebAssembly registers are implicitly initialized to zero. Explicit zeroing is +therefore often redundant and could be optimized away. + +//===---------------------------------------------------------------------===// + +Small indices may use smaller encodings than large indices. +WebAssemblyRegColoring and/or WebAssemblyRegRenumbering should sort registers +according to their usage frequency to maximize the usage of smaller encodings. + +//===---------------------------------------------------------------------===// + +Many cases of irreducible control flow could be transformed more optimally +than via the transform in WebAssemblyFixIrreducibleControlFlow.cpp. + +It may also be worthwhile to do transforms before register coloring, +particularly when duplicating code, to allow register coloring to be aware of +the duplication. + +//===---------------------------------------------------------------------===// + +WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more +aggressively. + +//===---------------------------------------------------------------------===// + +WebAssemblyRegStackify is currently a greedy algorithm. This means that, for +example, a binary operator will stackify with its user before its operands. +However, if moving the binary operator to its user moves it to a place where +its operands can't be moved to, it would be better to leave it in place, or +perhaps move it up, so that it can stackify its operands. A binary operator +has two operands and one result, so in such cases there could be a net win by +preferring the operands. + +//===---------------------------------------------------------------------===// + +Instruction ordering has a significant influence on register stackification and +coloring. Consider experimenting with the MachineScheduler (enable via +enableMachineScheduler) and determine if it can be configured to schedule +instructions advantageously for this purpose. + +//===---------------------------------------------------------------------===// + +WebAssemblyRegStackify currently assumes that the stack must be empty after +an instruction with no return values, however wasm doesn't actually require +this. WebAssemblyRegStackify could be extended, or possibly rewritten, to take +full advantage of what WebAssembly permits. + +//===---------------------------------------------------------------------===// + +Add support for mergeable sections in the Wasm writer, such as for strings and +floating-point constants. + +//===---------------------------------------------------------------------===// + +The function @dynamic_alloca_redzone in test/CodeGen/WebAssembly/userstack.ll +ends up with a local.tee in its prolog which has an unused result, requiring +an extra drop: + + global.get $push8=, 0 + local.tee $push9=, 1, $pop8 + drop $pop9 + [...] + +The prologue code initially thinks it needs an FP register, but later it +turns out to be unneeded, so one could either approach this by being more +clever about not inserting code for an FP in the first place, or optimizing +away the copy later. + +//===---------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp new file mode 100644 index 000000000000..e4afe2bb2830 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp @@ -0,0 +1,34 @@ +//===-- WebAssemblyTargetInfo.cpp - WebAssembly Target Implementation -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file registers the WebAssembly target. +/// +//===----------------------------------------------------------------------===// + +#include "TargetInfo/WebAssemblyTargetInfo.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-target-info" + +Target &llvm::getTheWebAssemblyTarget32() { + static Target TheWebAssemblyTarget32; + return TheWebAssemblyTarget32; +} +Target &llvm::getTheWebAssemblyTarget64() { + static Target TheWebAssemblyTarget64; + return TheWebAssemblyTarget64; +} + +extern "C" void LLVMInitializeWebAssemblyTargetInfo() { + RegisterTarget<Triple::wasm32> X(getTheWebAssemblyTarget32(), "wasm32", + "WebAssembly 32-bit", "WebAssembly"); + RegisterTarget<Triple::wasm64> Y(getTheWebAssemblyTarget64(), "wasm64", + "WebAssembly 64-bit", "WebAssembly"); +} diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h new file mode 100644 index 000000000000..a7427f78c72c --- /dev/null +++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h @@ -0,0 +1,26 @@ +//===-- WebAssemblyTargetInfo.h - WebAssembly Target Impl -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file registers the WebAssembly target. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H + +namespace llvm { + +class Target; + +Target &getTheWebAssemblyTarget32(); +Target &getTheWebAssemblyTarget64(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h new file mode 100644 index 000000000000..fcbd0a5082ff --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -0,0 +1,84 @@ +//===-- WebAssembly.h - Top-level interface for WebAssembly ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the entry points for global functions defined in +/// the LLVM WebAssembly back-end. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H + +#include "llvm/PassRegistry.h" +#include "llvm/Support/CodeGen.h" + +namespace llvm { + +class WebAssemblyTargetMachine; +class ModulePass; +class FunctionPass; + +// LLVM IR passes. +ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj); +ModulePass *createWebAssemblyLowerGlobalDtors(); +ModulePass *createWebAssemblyAddMissingPrototypes(); +ModulePass *createWebAssemblyFixFunctionBitcasts(); +FunctionPass *createWebAssemblyOptimizeReturned(); + +// ISel and immediate followup passes. +FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, + CodeGenOpt::Level OptLevel); +FunctionPass *createWebAssemblyArgumentMove(); +FunctionPass *createWebAssemblySetP2AlignOperands(); + +// Late passes. +FunctionPass *createWebAssemblyReplacePhysRegs(); +FunctionPass *createWebAssemblyPrepareForLiveIntervals(); +FunctionPass *createWebAssemblyOptimizeLiveIntervals(); +FunctionPass *createWebAssemblyMemIntrinsicResults(); +FunctionPass *createWebAssemblyRegStackify(); +FunctionPass *createWebAssemblyRegColoring(); +FunctionPass *createWebAssemblyFixIrreducibleControlFlow(); +FunctionPass *createWebAssemblyLateEHPrepare(); +FunctionPass *createWebAssemblyCFGSort(); +FunctionPass *createWebAssemblyCFGStackify(); +FunctionPass *createWebAssemblyExplicitLocals(); +FunctionPass *createWebAssemblyLowerBrUnless(); +FunctionPass *createWebAssemblyRegNumbering(); +FunctionPass *createWebAssemblyPeephole(); +FunctionPass *createWebAssemblyCallIndirectFixup(); + +// PassRegistry initialization declarations. +void initializeWebAssemblyAddMissingPrototypesPass(PassRegistry &); +void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &); +void initializeLowerGlobalDtorsPass(PassRegistry &); +void initializeFixFunctionBitcastsPass(PassRegistry &); +void initializeOptimizeReturnedPass(PassRegistry &); +void initializeWebAssemblyArgumentMovePass(PassRegistry &); +void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &); +void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &); +void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &); +void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &); +void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); +void initializeWebAssemblyRegStackifyPass(PassRegistry &); +void initializeWebAssemblyRegColoringPass(PassRegistry &); +void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &); +void initializeWebAssemblyLateEHPreparePass(PassRegistry &); +void initializeWebAssemblyExceptionInfoPass(PassRegistry &); +void initializeWebAssemblyCFGSortPass(PassRegistry &); +void initializeWebAssemblyCFGStackifyPass(PassRegistry &); +void initializeWebAssemblyExplicitLocalsPass(PassRegistry &); +void initializeWebAssemblyLowerBrUnlessPass(PassRegistry &); +void initializeWebAssemblyRegNumberingPass(PassRegistry &); +void initializeWebAssemblyPeepholePass(PassRegistry &); +void initializeWebAssemblyCallIndirectFixupPass(PassRegistry &); + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td new file mode 100644 index 000000000000..b0b8a9b996a3 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -0,0 +1,123 @@ +//- WebAssembly.td - Describe the WebAssembly Target Machine --*- tablegen -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is a target description file for the WebAssembly architecture, +/// which is also known as "wasm". +/// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// WebAssembly Subtarget features. +//===----------------------------------------------------------------------===// + +def FeatureSIMD128 : SubtargetFeature<"simd128", "SIMDLevel", "SIMD128", + "Enable 128-bit SIMD">; + +def FeatureUnimplementedSIMD128 : + SubtargetFeature<"unimplemented-simd128", + "SIMDLevel", "UnimplementedSIMD128", + "Enable 128-bit SIMD not yet implemented in engines", + [FeatureSIMD128]>; + +def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true", + "Enable Atomics">; + +def FeatureNontrappingFPToInt : + SubtargetFeature<"nontrapping-fptoint", + "HasNontrappingFPToInt", "true", + "Enable non-trapping float-to-int conversion operators">; + +def FeatureSignExt : + SubtargetFeature<"sign-ext", + "HasSignExt", "true", + "Enable sign extension operators">; + +def FeatureTailCall : + SubtargetFeature<"tail-call", + "HasTailCall", "true", + "Enable tail call instructions">; + +def FeatureExceptionHandling : + SubtargetFeature<"exception-handling", "HasExceptionHandling", "true", + "Enable Wasm exception handling">; + +def FeatureBulkMemory : + SubtargetFeature<"bulk-memory", "HasBulkMemory", "true", + "Enable bulk memory operations">; + +def FeatureMultivalue : + SubtargetFeature<"multivalue", + "HasMultivalue", "true", + "Enable multivalue blocks, instructions, and functions">; + +def FeatureMutableGlobals : + SubtargetFeature<"mutable-globals", "HasMutableGlobals", "true", + "Enable mutable globals">; + +//===----------------------------------------------------------------------===// +// Architectures. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "WebAssemblyRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "WebAssemblyInstrInfo.td" + +def WebAssemblyInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// WebAssembly Processors supported. +//===----------------------------------------------------------------------===// + +// Minimal Viable Product. +def : ProcessorModel<"mvp", NoSchedModel, []>; + +// Generic processor: latest stable version. +def : ProcessorModel<"generic", NoSchedModel, []>; + +// Latest and greatest experimental version of WebAssembly. Bugs included! +def : ProcessorModel<"bleeding-edge", NoSchedModel, + [FeatureSIMD128, FeatureAtomics, + FeatureNontrappingFPToInt, FeatureSignExt, + FeatureMutableGlobals]>; + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def WebAssemblyAsmParser : AsmParser { + // The physical register names are not in the binary format or asm text + let ShouldEmitMatchRegisterName = 0; +} + +def WebAssemblyAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 0; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def WebAssembly : Target { + let InstructionSet = WebAssemblyInstrInfo; + let AssemblyParsers = [WebAssemblyAsmParser]; + let AssemblyWriters = [WebAssemblyAsmWriter]; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp new file mode 100644 index 000000000000..b7a701f15782 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp @@ -0,0 +1,144 @@ +//===-- WebAssemblyAddMissingPrototypes.cpp - Fix prototypeless functions -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Add prototypes to prototypes-less functions. +/// +/// WebAssembly has strict function prototype checking so we need functions +/// declarations to match the call sites. Clang treats prototype-less functions +/// as varargs (foo(...)) which happens to work on existing platforms but +/// doesn't under WebAssembly. This pass will find all the call sites of each +/// prototype-less function, ensure they agree, and then set the signature +/// on the function declaration accordingly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-add-missing-prototypes" + +namespace { +class WebAssemblyAddMissingPrototypes final : public ModulePass { + StringRef getPassName() const override { + return "Add prototypes to prototypes-less functions"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override; + +public: + static char ID; + WebAssemblyAddMissingPrototypes() : ModulePass(ID) {} +}; +} // End anonymous namespace + +char WebAssemblyAddMissingPrototypes::ID = 0; +INITIALIZE_PASS(WebAssemblyAddMissingPrototypes, DEBUG_TYPE, + "Add prototypes to prototypes-less functions", false, false) + +ModulePass *llvm::createWebAssemblyAddMissingPrototypes() { + return new WebAssemblyAddMissingPrototypes(); +} + +bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) { + LLVM_DEBUG(dbgs() << "********** Add Missing Prototypes **********\n"); + + std::vector<std::pair<Function *, Function *>> Replacements; + + // Find all the prototype-less function declarations + for (Function &F : M) { + if (!F.isDeclaration() || !F.hasFnAttribute("no-prototype")) + continue; + + LLVM_DEBUG(dbgs() << "Found no-prototype function: " << F.getName() + << "\n"); + + // When clang emits prototype-less C functions it uses (...), i.e. varargs + // function that take no arguments (have no sentinel). When we see a + // no-prototype attribute we expect the function have these properties. + if (!F.isVarArg()) + report_fatal_error( + "Functions with 'no-prototype' attribute must take varargs: " + + F.getName()); + unsigned NumParams = F.getFunctionType()->getNumParams(); + if (NumParams != 0) { + if (!(NumParams == 1 && F.arg_begin()->hasStructRetAttr())) + report_fatal_error("Functions with 'no-prototype' attribute should " + "not have params: " + + F.getName()); + } + + // Create a function prototype based on the first call site (first bitcast) + // that we find. + FunctionType *NewType = nullptr; + for (Use &U : F.uses()) { + LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << *U.getUser() << "\n"); + if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) { + if (auto *DestType = dyn_cast<FunctionType>( + BC->getDestTy()->getPointerElementType())) { + if (!NewType) { + // Create a new function with the correct type + NewType = DestType; + LLVM_DEBUG(dbgs() << "found function type: " << *NewType << "\n"); + } else if (NewType != DestType) { + errs() << "warning: prototype-less function used with " + "conflicting signatures: " + << F.getName() << "\n"; + LLVM_DEBUG(dbgs() << " " << *DestType << "\n"); + LLVM_DEBUG(dbgs() << " "<< *NewType << "\n"); + } + } + } + } + + if (!NewType) { + LLVM_DEBUG( + dbgs() << "could not derive a function prototype from usage: " + + F.getName() + "\n"); + // We could not derive a type for this function. In this case strip + // the isVarArg and make it a simple zero-arg function. This has more + // chance of being correct. The current signature of (...) is illegal in + // C since it doesn't have any arguments before the "...", we this at + // least makes it possible for this symbol to be resolved by the linker. + NewType = FunctionType::get(F.getFunctionType()->getReturnType(), false); + } + + Function *NewF = + Function::Create(NewType, F.getLinkage(), F.getName() + ".fixed_sig"); + NewF->setAttributes(F.getAttributes()); + NewF->removeFnAttr("no-prototype"); + Replacements.emplace_back(&F, NewF); + } + + for (auto &Pair : Replacements) { + Function *OldF = Pair.first; + Function *NewF = Pair.second; + std::string Name = OldF->getName(); + M.getFunctionList().push_back(NewF); + OldF->replaceAllUsesWith( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewF, OldF->getType())); + OldF->eraseFromParent(); + NewF->setName(Name); + } + + return !Replacements.empty(); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp new file mode 100644 index 000000000000..02f5cc6da77c --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp @@ -0,0 +1,97 @@ +//===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file moves ARGUMENT instructions after ScheduleDAG scheduling. +/// +/// Arguments are really live-in registers, however, since we use virtual +/// registers and LLVM doesn't support live-in virtual registers, we're +/// currently making do with ARGUMENT instructions which are placed at the top +/// of the entry block. The trick is to get them to *stay* at the top of the +/// entry block. +/// +/// The ARGUMENTS physical register keeps these instructions pinned in place +/// during liveness-aware CodeGen passes, however one thing which does not +/// respect this is the ScheduleDAG scheduler. This pass is therefore run +/// immediately after that. +/// +/// This is all hopefully a temporary solution until we find a better solution +/// for describing the live-in nature of arguments. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-argument-move" + +namespace { +class WebAssemblyArgumentMove final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyArgumentMove() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return "WebAssembly Argument Move"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyArgumentMove::ID = 0; +INITIALIZE_PASS(WebAssemblyArgumentMove, DEBUG_TYPE, + "Move ARGUMENT instructions for WebAssembly", false, false) + +FunctionPass *llvm::createWebAssemblyArgumentMove() { + return new WebAssemblyArgumentMove(); +} + +bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Argument Move **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + MachineBasicBlock &EntryMBB = MF.front(); + MachineBasicBlock::iterator InsertPt = EntryMBB.end(); + + // Look for the first NonArg instruction. + for (MachineInstr &MI : EntryMBB) { + if (!WebAssembly::isArgument(MI.getOpcode())) { + InsertPt = MI; + break; + } + } + + // Now move any argument instructions later in the block + // to before our first NonArg instruction. + for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) { + if (WebAssembly::isArgument(MI.getOpcode())) { + EntryMBB.insert(InsertPt, MI.removeFromParent()); + Changed = true; + } + } + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp new file mode 100644 index 000000000000..5d8b873ce23b --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -0,0 +1,421 @@ +//===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains a printer that converts from our internal +/// representation of machine-dependent LLVM code to the WebAssembly assembly +/// language. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyAsmPrinter.h" +#include "MCTargetDesc/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "MCTargetDesc/WebAssemblyTargetStreamer.h" +#include "TargetInfo/WebAssemblyTargetInfo.h" +#include "WebAssembly.h" +#include "WebAssemblyMCInstLower.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyRegisterInfo.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +extern cl::opt<bool> WasmKeepRegisters; + +//===----------------------------------------------------------------------===// +// Helpers. +//===----------------------------------------------------------------------===// + +MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); + for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16, + MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}) + if (TRI->isTypeLegalForClass(*TRC, T)) + return T; + LLVM_DEBUG(errs() << "Unknown type for register number: " << RegNo); + llvm_unreachable("Unknown register type"); + return MVT::Other; +} + +std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { + Register RegNo = MO.getReg(); + assert(Register::isVirtualRegister(RegNo) && + "Unlowered physical register encountered during assembly printing"); + assert(!MFI->isVRegStackified(RegNo)); + unsigned WAReg = MFI->getWAReg(RegNo); + assert(WAReg != WebAssemblyFunctionInfo::UnusedReg); + return '$' + utostr(WAReg); +} + +WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() { + MCTargetStreamer *TS = OutStreamer->getTargetStreamer(); + return static_cast<WebAssemblyTargetStreamer *>(TS); +} + +//===----------------------------------------------------------------------===// +// WebAssemblyAsmPrinter Implementation. +//===----------------------------------------------------------------------===// + +void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { + for (auto &It : OutContext.getSymbols()) { + // Emit a .globaltype and .eventtype declaration. + auto Sym = cast<MCSymbolWasm>(It.getValue()); + if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_GLOBAL) + getTargetStreamer()->emitGlobalType(Sym); + else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_EVENT) + getTargetStreamer()->emitEventType(Sym); + } + + for (const auto &F : M) { + // Emit function type info for all undefined functions + if (F.isDeclarationForLinker() && !F.isIntrinsic()) { + SmallVector<MVT, 4> Results; + SmallVector<MVT, 4> Params; + computeSignatureVTs(F.getFunctionType(), F, TM, Params, Results); + auto *Sym = cast<MCSymbolWasm>(getSymbol(&F)); + Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + if (!Sym->getSignature()) { + auto Signature = signatureFromMVTs(Results, Params); + Sym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + } + // FIXME: this was originally intended for post-linking and was only used + // for imports that were only called indirectly (i.e. s2wasm could not + // infer the type from a call). With object files it applies to all + // imports. so fix the names and the tests, or rethink how import + // delcarations work in asm files. + getTargetStreamer()->emitFunctionType(Sym); + + if (TM.getTargetTriple().isOSBinFormatWasm() && + F.hasFnAttribute("wasm-import-module")) { + StringRef Name = + F.getFnAttribute("wasm-import-module").getValueAsString(); + Sym->setImportModule(Name); + getTargetStreamer()->emitImportModule(Sym, Name); + } + if (TM.getTargetTriple().isOSBinFormatWasm() && + F.hasFnAttribute("wasm-import-name")) { + StringRef Name = + F.getFnAttribute("wasm-import-name").getValueAsString(); + Sym->setImportName(Name); + getTargetStreamer()->emitImportName(Sym, Name); + } + } + } + + for (const auto &G : M.globals()) { + if (!G.hasInitializer() && G.hasExternalLinkage()) { + if (G.getValueType()->isSized()) { + uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType()); + OutStreamer->emitELFSize(getSymbol(&G), + MCConstantExpr::create(Size, OutContext)); + } + } + } + + if (const NamedMDNode *Named = M.getNamedMetadata("wasm.custom_sections")) { + for (const Metadata *MD : Named->operands()) { + const auto *Tuple = dyn_cast<MDTuple>(MD); + if (!Tuple || Tuple->getNumOperands() != 2) + continue; + const MDString *Name = dyn_cast<MDString>(Tuple->getOperand(0)); + const MDString *Contents = dyn_cast<MDString>(Tuple->getOperand(1)); + if (!Name || !Contents) + continue; + + OutStreamer->PushSection(); + std::string SectionName = (".custom_section." + Name->getString()).str(); + MCSectionWasm *MySection = + OutContext.getWasmSection(SectionName, SectionKind::getMetadata()); + OutStreamer->SwitchSection(MySection); + OutStreamer->EmitBytes(Contents->getString()); + OutStreamer->PopSection(); + } + } + + EmitProducerInfo(M); + EmitTargetFeatures(M); +} + +void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) { + llvm::SmallVector<std::pair<std::string, std::string>, 4> Languages; + if (const NamedMDNode *Debug = M.getNamedMetadata("llvm.dbg.cu")) { + llvm::SmallSet<StringRef, 4> SeenLanguages; + for (size_t I = 0, E = Debug->getNumOperands(); I < E; ++I) { + const auto *CU = cast<DICompileUnit>(Debug->getOperand(I)); + StringRef Language = dwarf::LanguageString(CU->getSourceLanguage()); + Language.consume_front("DW_LANG_"); + if (SeenLanguages.insert(Language).second) + Languages.emplace_back(Language.str(), ""); + } + } + + llvm::SmallVector<std::pair<std::string, std::string>, 4> Tools; + if (const NamedMDNode *Ident = M.getNamedMetadata("llvm.ident")) { + llvm::SmallSet<StringRef, 4> SeenTools; + for (size_t I = 0, E = Ident->getNumOperands(); I < E; ++I) { + const auto *S = cast<MDString>(Ident->getOperand(I)->getOperand(0)); + std::pair<StringRef, StringRef> Field = S->getString().split("version"); + StringRef Name = Field.first.trim(); + StringRef Version = Field.second.trim(); + if (SeenTools.insert(Name).second) + Tools.emplace_back(Name.str(), Version.str()); + } + } + + int FieldCount = int(!Languages.empty()) + int(!Tools.empty()); + if (FieldCount != 0) { + MCSectionWasm *Producers = OutContext.getWasmSection( + ".custom_section.producers", SectionKind::getMetadata()); + OutStreamer->PushSection(); + OutStreamer->SwitchSection(Producers); + OutStreamer->EmitULEB128IntValue(FieldCount); + for (auto &Producers : {std::make_pair("language", &Languages), + std::make_pair("processed-by", &Tools)}) { + if (Producers.second->empty()) + continue; + OutStreamer->EmitULEB128IntValue(strlen(Producers.first)); + OutStreamer->EmitBytes(Producers.first); + OutStreamer->EmitULEB128IntValue(Producers.second->size()); + for (auto &Producer : *Producers.second) { + OutStreamer->EmitULEB128IntValue(Producer.first.size()); + OutStreamer->EmitBytes(Producer.first); + OutStreamer->EmitULEB128IntValue(Producer.second.size()); + OutStreamer->EmitBytes(Producer.second); + } + } + OutStreamer->PopSection(); + } +} + +void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) { + struct FeatureEntry { + uint8_t Prefix; + StringRef Name; + }; + + // Read target features and linkage policies from module metadata + SmallVector<FeatureEntry, 4> EmittedFeatures; + for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { + std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); + Metadata *Policy = M.getModuleFlag(MDKey); + if (Policy == nullptr) + continue; + + FeatureEntry Entry; + Entry.Prefix = 0; + Entry.Name = KV.Key; + + if (auto *MD = cast<ConstantAsMetadata>(Policy)) + if (auto *I = cast<ConstantInt>(MD->getValue())) + Entry.Prefix = I->getZExtValue(); + + // Silently ignore invalid metadata + if (Entry.Prefix != wasm::WASM_FEATURE_PREFIX_USED && + Entry.Prefix != wasm::WASM_FEATURE_PREFIX_REQUIRED && + Entry.Prefix != wasm::WASM_FEATURE_PREFIX_DISALLOWED) + continue; + + EmittedFeatures.push_back(Entry); + } + + if (EmittedFeatures.size() == 0) + return; + + // Emit features and linkage policies into the "target_features" section + MCSectionWasm *FeaturesSection = OutContext.getWasmSection( + ".custom_section.target_features", SectionKind::getMetadata()); + OutStreamer->PushSection(); + OutStreamer->SwitchSection(FeaturesSection); + + OutStreamer->EmitULEB128IntValue(EmittedFeatures.size()); + for (auto &F : EmittedFeatures) { + OutStreamer->EmitIntValue(F.Prefix, 1); + OutStreamer->EmitULEB128IntValue(F.Name.size()); + OutStreamer->EmitBytes(F.Name); + } + + OutStreamer->PopSection(); +} + +void WebAssemblyAsmPrinter::EmitConstantPool() { + assert(MF->getConstantPool()->getConstants().empty() && + "WebAssembly disables constant pools"); +} + +void WebAssemblyAsmPrinter::EmitJumpTableInfo() { + // Nothing to do; jump tables are incorporated into the instruction stream. +} + +void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { + const Function &F = MF->getFunction(); + SmallVector<MVT, 1> ResultVTs; + SmallVector<MVT, 4> ParamVTs; + computeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs); + auto Signature = signatureFromMVTs(ResultVTs, ParamVTs); + auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym); + WasmSym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + + // FIXME: clean up how params and results are emitted (use signatures) + getTargetStreamer()->emitFunctionType(WasmSym); + + // Emit the function index. + if (MDNode *Idx = F.getMetadata("wasm.index")) { + assert(Idx->getNumOperands() == 1); + + getTargetStreamer()->emitIndIdx(AsmPrinter::lowerConstant( + cast<ConstantAsMetadata>(Idx->getOperand(0))->getValue())); + } + + SmallVector<wasm::ValType, 16> Locals; + valTypesFromMVTs(MFI->getLocals(), Locals); + getTargetStreamer()->emitLocal(Locals); + + AsmPrinter::EmitFunctionBodyStart(); +} + +void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n'); + + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_i32: + case WebAssembly::ARGUMENT_i32_S: + case WebAssembly::ARGUMENT_i64: + case WebAssembly::ARGUMENT_i64_S: + case WebAssembly::ARGUMENT_f32: + case WebAssembly::ARGUMENT_f32_S: + case WebAssembly::ARGUMENT_f64: + case WebAssembly::ARGUMENT_f64_S: + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v16i8_S: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v8i16_S: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4i32_S: + case WebAssembly::ARGUMENT_v2i64: + case WebAssembly::ARGUMENT_v2i64_S: + case WebAssembly::ARGUMENT_v4f32: + case WebAssembly::ARGUMENT_v4f32_S: + case WebAssembly::ARGUMENT_v2f64: + case WebAssembly::ARGUMENT_v2f64_S: + // These represent values which are live into the function entry, so there's + // no instruction to emit. + break; + case WebAssembly::FALLTHROUGH_RETURN: { + // These instructions represent the implicit return at the end of a + // function body. + if (isVerbose()) { + OutStreamer->AddComment("fallthrough-return"); + OutStreamer->AddBlankLine(); + } + break; + } + case WebAssembly::COMPILER_FENCE: + // This is a compiler barrier that prevents instruction reordering during + // backend compilation, and should not be emitted. + break; + case WebAssembly::EXTRACT_EXCEPTION_I32: + case WebAssembly::EXTRACT_EXCEPTION_I32_S: + // These are pseudo instructions that simulates popping values from stack. + // We print these only when we have -wasm-keep-registers on for assembly + // readability. + if (!WasmKeepRegisters) + break; + LLVM_FALLTHROUGH; + default: { + WebAssemblyMCInstLower MCInstLowering(OutContext, *this); + MCInst TmpInst; + MCInstLowering.lower(MI, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); + break; + } + } +} + +bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS)) + return false; + + if (!ExtraCode) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + OS << MO.getImm(); + return false; + case MachineOperand::MO_Register: + // FIXME: only opcode that still contains registers, as required by + // MachineInstr::getDebugVariable(). + assert(MI->getOpcode() == WebAssembly::INLINEASM); + OS << regToString(MO); + return false; + case MachineOperand::MO_GlobalAddress: + PrintSymbolOperand(MO, OS); + return false; + case MachineOperand::MO_ExternalSymbol: + GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI); + printOffset(MO.getOffset(), OS); + return false; + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(OS, MAI); + return false; + default: + break; + } + } + + return true; +} + +bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + // The current approach to inline asm is that "r" constraints are expressed + // as local indices, rather than values on the operand stack. This simplifies + // using "r" as it eliminates the need to push and pop the values in a + // particular order, however it also makes it impossible to have an "m" + // constraint. So we don't support it. + + return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS); +} + +// Force static initialization. +extern "C" void LLVMInitializeWebAssemblyAsmPrinter() { + RegisterAsmPrinter<WebAssemblyAsmPrinter> X(getTheWebAssemblyTarget32()); + RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(getTheWebAssemblyTarget64()); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h new file mode 100644 index 000000000000..4e55c81dec38 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h @@ -0,0 +1,79 @@ +// WebAssemblyAsmPrinter.h - WebAssembly implementation of AsmPrinter-*- C++ -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H + +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class MCSymbol; +class WebAssemblyTargetStreamer; +class WebAssemblyMCInstLower; + +class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter { + const WebAssemblySubtarget *Subtarget; + const MachineRegisterInfo *MRI; + WebAssemblyFunctionInfo *MFI; + // TODO: Do the uniquing of Signatures here instead of ObjectFileWriter? + std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures; + +public: + explicit WebAssemblyAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr), MRI(nullptr), + MFI(nullptr) {} + + StringRef getPassName() const override { + return "WebAssembly Assembly Printer"; + } + + const WebAssemblySubtarget &getSubtarget() const { return *Subtarget; } + void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) { + Signatures.push_back(std::move(Sig)); + } + + //===------------------------------------------------------------------===// + // MachineFunctionPass Implementation. + //===------------------------------------------------------------------===// + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget<WebAssemblySubtarget>(); + MRI = &MF.getRegInfo(); + MFI = MF.getInfo<WebAssemblyFunctionInfo>(); + return AsmPrinter::runOnMachineFunction(MF); + } + + //===------------------------------------------------------------------===// + // AsmPrinter Implementation. + //===------------------------------------------------------------------===// + + void EmitEndOfAsmFile(Module &M) override; + void EmitProducerInfo(Module &M); + void EmitTargetFeatures(Module &M); + void EmitJumpTableInfo() override; + void EmitConstantPool() override; + void EmitFunctionBodyStart() override; + void EmitInstruction(const MachineInstr *MI) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + + MVT getRegType(unsigned RegNo) const; + std::string regToString(const MachineOperand &MO); + WebAssemblyTargetStreamer *getTargetStreamer(); +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp new file mode 100644 index 000000000000..c069af9eed62 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -0,0 +1,421 @@ +//===-- WebAssemblyCFGSort.cpp - CFG Sorting ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a CFG sorting pass. +/// +/// This pass reorders the blocks in a function to put them into topological +/// order, ignoring loop backedges, and without any loop or exception being +/// interrupted by a block not dominated by the its header, with special care +/// to keep the order as similar as possible to the original order. +/// +////===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyExceptionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-cfg-sort" + +// Option to disable EH pad first sorting. Only for testing unwind destination +// mismatches in CFGStackify. +static cl::opt<bool> WasmDisableEHPadSort( + "wasm-disable-ehpad-sort", cl::ReallyHidden, + cl::desc( + "WebAssembly: Disable EH pad-first sort order. Testing purpose only."), + cl::init(false)); + +namespace { + +// Wrapper for loops and exceptions +class Region { +public: + virtual ~Region() = default; + virtual MachineBasicBlock *getHeader() const = 0; + virtual bool contains(const MachineBasicBlock *MBB) const = 0; + virtual unsigned getNumBlocks() const = 0; + using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; + virtual iterator_range<block_iterator> blocks() const = 0; + virtual bool isLoop() const = 0; +}; + +template <typename T> class ConcreteRegion : public Region { + const T *Region; + +public: + ConcreteRegion(const T *Region) : Region(Region) {} + MachineBasicBlock *getHeader() const override { return Region->getHeader(); } + bool contains(const MachineBasicBlock *MBB) const override { + return Region->contains(MBB); + } + unsigned getNumBlocks() const override { return Region->getNumBlocks(); } + iterator_range<block_iterator> blocks() const override { + return Region->blocks(); + } + bool isLoop() const override { return false; } +}; + +template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; } + +// This class has information of nested Regions; this is analogous to what +// LoopInfo is for loops. +class RegionInfo { + const MachineLoopInfo &MLI; + const WebAssemblyExceptionInfo &WEI; + std::vector<const Region *> Regions; + DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap; + DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap; + +public: + RegionInfo(const MachineLoopInfo &MLI, const WebAssemblyExceptionInfo &WEI) + : MLI(MLI), WEI(WEI) {} + + // Returns a smallest loop or exception that contains MBB + const Region *getRegionFor(const MachineBasicBlock *MBB) { + const auto *ML = MLI.getLoopFor(MBB); + const auto *WE = WEI.getExceptionFor(MBB); + if (!ML && !WE) + return nullptr; + if ((ML && !WE) || (ML && WE && ML->getNumBlocks() < WE->getNumBlocks())) { + // If the smallest region containing MBB is a loop + if (LoopMap.count(ML)) + return LoopMap[ML].get(); + LoopMap[ML] = std::make_unique<ConcreteRegion<MachineLoop>>(ML); + return LoopMap[ML].get(); + } else { + // If the smallest region containing MBB is an exception + if (ExceptionMap.count(WE)) + return ExceptionMap[WE].get(); + ExceptionMap[WE] = + std::make_unique<ConcreteRegion<WebAssemblyException>>(WE); + return ExceptionMap[WE].get(); + } + } +}; + +class WebAssemblyCFGSort final : public MachineFunctionPass { + StringRef getPassName() const override { return "WebAssembly CFG Sort"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<WebAssemblyExceptionInfo>(); + AU.addPreserved<WebAssemblyExceptionInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyCFGSort() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyCFGSort::ID = 0; +INITIALIZE_PASS(WebAssemblyCFGSort, DEBUG_TYPE, + "Reorders blocks in topological order", false, false) + +FunctionPass *llvm::createWebAssemblyCFGSort() { + return new WebAssemblyCFGSort(); +} + +static void maybeUpdateTerminator(MachineBasicBlock *MBB) { +#ifndef NDEBUG + bool AnyBarrier = false; +#endif + bool AllAnalyzable = true; + for (const MachineInstr &Term : MBB->terminators()) { +#ifndef NDEBUG + AnyBarrier |= Term.isBarrier(); +#endif + AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch(); + } + assert((AnyBarrier || AllAnalyzable) && + "AnalyzeBranch needs to analyze any block with a fallthrough"); + if (AllAnalyzable) + MBB->updateTerminator(); +} + +namespace { +// EH pads are selected first regardless of the block comparison order. +// When only one of the BBs is an EH pad, we give a higher priority to it, to +// prevent common mismatches between possibly throwing calls and ehpads they +// unwind to, as in the example below: +// +// bb0: +// call @foo // If this throws, unwind to bb2 +// bb1: +// call @bar // If this throws, unwind to bb3 +// bb2 (ehpad): +// handler_bb2 +// bb3 (ehpad): +// handler_bb3 +// continuing code +// +// Because this pass tries to preserve the original BB order, this order will +// not change. But this will result in this try-catch structure in CFGStackify, +// resulting in a mismatch: +// try +// try +// call @foo +// call @bar // This should unwind to bb3, not bb2! +// catch +// handler_bb2 +// end +// catch +// handler_bb3 +// end +// continuing code +// +// If we give a higher priority to an EH pad whenever it is ready in this +// example, when both bb1 and bb2 are ready, we would pick up bb2 first. + +/// Sort blocks by their number. +struct CompareBlockNumbers { + bool operator()(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { + if (!WasmDisableEHPadSort) { + if (A->isEHPad() && !B->isEHPad()) + return false; + if (!A->isEHPad() && B->isEHPad()) + return true; + } + + return A->getNumber() > B->getNumber(); + } +}; +/// Sort blocks by their number in the opposite order.. +struct CompareBlockNumbersBackwards { + bool operator()(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { + if (!WasmDisableEHPadSort) { + if (A->isEHPad() && !B->isEHPad()) + return false; + if (!A->isEHPad() && B->isEHPad()) + return true; + } + + return A->getNumber() < B->getNumber(); + } +}; +/// Bookkeeping for a region to help ensure that we don't mix blocks not +/// dominated by the its header among its blocks. +struct Entry { + const Region *TheRegion; + unsigned NumBlocksLeft; + + /// List of blocks not dominated by Loop's header that are deferred until + /// after all of Loop's blocks have been seen. + std::vector<MachineBasicBlock *> Deferred; + + explicit Entry(const class Region *R) + : TheRegion(R), NumBlocksLeft(R->getNumBlocks()) {} +}; +} // end anonymous namespace + +/// Sort the blocks, taking special care to make sure that regions are not +/// interrupted by blocks not dominated by their header. +/// TODO: There are many opportunities for improving the heuristics here. +/// Explore them. +static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, + const WebAssemblyExceptionInfo &WEI, + const MachineDominatorTree &MDT) { + // Prepare for a topological sort: Record the number of predecessors each + // block has, ignoring loop backedges. + MF.RenumberBlocks(); + SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0); + for (MachineBasicBlock &MBB : MF) { + unsigned N = MBB.pred_size(); + if (MachineLoop *L = MLI.getLoopFor(&MBB)) + if (L->getHeader() == &MBB) + for (const MachineBasicBlock *Pred : MBB.predecessors()) + if (L->contains(Pred)) + --N; + NumPredsLeft[MBB.getNumber()] = N; + } + + // Topological sort the CFG, with additional constraints: + // - Between a region header and the last block in the region, there can be + // no blocks not dominated by its header. + // - It's desirable to preserve the original block order when possible. + // We use two ready lists; Preferred and Ready. Preferred has recently + // processed successors, to help preserve block sequences from the original + // order. Ready has the remaining ready blocks. EH blocks are picked first + // from both queues. + PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, + CompareBlockNumbers> + Preferred; + PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, + CompareBlockNumbersBackwards> + Ready; + + RegionInfo RI(MLI, WEI); + SmallVector<Entry, 4> Entries; + for (MachineBasicBlock *MBB = &MF.front();;) { + const Region *R = RI.getRegionFor(MBB); + if (R) { + // If MBB is a region header, add it to the active region list. We can't + // put any blocks that it doesn't dominate until we see the end of the + // region. + if (R->getHeader() == MBB) + Entries.push_back(Entry(R)); + // For each active region the block is in, decrement the count. If MBB is + // the last block in an active region, take it off the list and pick up + // any blocks deferred because the header didn't dominate them. + for (Entry &E : Entries) + if (E.TheRegion->contains(MBB) && --E.NumBlocksLeft == 0) + for (auto DeferredBlock : E.Deferred) + Ready.push(DeferredBlock); + while (!Entries.empty() && Entries.back().NumBlocksLeft == 0) + Entries.pop_back(); + } + // The main topological sort logic. + for (MachineBasicBlock *Succ : MBB->successors()) { + // Ignore backedges. + if (MachineLoop *SuccL = MLI.getLoopFor(Succ)) + if (SuccL->getHeader() == Succ && SuccL->contains(MBB)) + continue; + // Decrement the predecessor count. If it's now zero, it's ready. + if (--NumPredsLeft[Succ->getNumber()] == 0) + Preferred.push(Succ); + } + // Determine the block to follow MBB. First try to find a preferred block, + // to preserve the original block order when possible. + MachineBasicBlock *Next = nullptr; + while (!Preferred.empty()) { + Next = Preferred.top(); + Preferred.pop(); + // If X isn't dominated by the top active region header, defer it until + // that region is done. + if (!Entries.empty() && + !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) { + Entries.back().Deferred.push_back(Next); + Next = nullptr; + continue; + } + // If Next was originally ordered before MBB, and it isn't because it was + // loop-rotated above the header, it's not preferred. + if (Next->getNumber() < MBB->getNumber() && + (WasmDisableEHPadSort || !Next->isEHPad()) && + (!R || !R->contains(Next) || + R->getHeader()->getNumber() < Next->getNumber())) { + Ready.push(Next); + Next = nullptr; + continue; + } + break; + } + // If we didn't find a suitable block in the Preferred list, check the + // general Ready list. + if (!Next) { + // If there are no more blocks to process, we're done. + if (Ready.empty()) { + maybeUpdateTerminator(MBB); + break; + } + for (;;) { + Next = Ready.top(); + Ready.pop(); + // If Next isn't dominated by the top active region header, defer it + // until that region is done. + if (!Entries.empty() && + !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) { + Entries.back().Deferred.push_back(Next); + continue; + } + break; + } + } + // Move the next block into place and iterate. + Next->moveAfter(MBB); + maybeUpdateTerminator(MBB); + MBB = Next; + } + assert(Entries.empty() && "Active sort region list not finished"); + MF.RenumberBlocks(); + +#ifndef NDEBUG + SmallSetVector<const Region *, 8> OnStack; + + // Insert a sentinel representing the degenerate loop that starts at the + // function entry block and includes the entire function as a "loop" that + // executes once. + OnStack.insert(nullptr); + + for (auto &MBB : MF) { + assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); + const Region *Region = RI.getRegionFor(&MBB); + + if (Region && &MBB == Region->getHeader()) { + if (Region->isLoop()) { + // Loop header. The loop predecessor should be sorted above, and the + // other predecessors should be backedges below. + for (auto Pred : MBB.predecessors()) + assert( + (Pred->getNumber() < MBB.getNumber() || Region->contains(Pred)) && + "Loop header predecessors must be loop predecessors or " + "backedges"); + } else { + // Not a loop header. All predecessors should be sorted above. + for (auto Pred : MBB.predecessors()) + assert(Pred->getNumber() < MBB.getNumber() && + "Non-loop-header predecessors should be topologically sorted"); + } + assert(OnStack.insert(Region) && + "Regions should be declared at most once."); + + } else { + // Not a loop header. All predecessors should be sorted above. + for (auto Pred : MBB.predecessors()) + assert(Pred->getNumber() < MBB.getNumber() && + "Non-loop-header predecessors should be topologically sorted"); + assert(OnStack.count(RI.getRegionFor(&MBB)) && + "Blocks must be nested in their regions"); + } + while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back())) + OnStack.pop_back(); + } + assert(OnStack.pop_back_val() == nullptr && + "The function entry block shouldn't actually be a region header"); + assert(OnStack.empty() && + "Control flow stack pushes and pops should be balanced."); +#endif +} + +bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** CFG Sorting **********\n" + "********** Function: " + << MF.getName() << '\n'); + + const auto &MLI = getAnalysis<MachineLoopInfo>(); + const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>(); + auto &MDT = getAnalysis<MachineDominatorTree>(); + // Liveness is not tracked for VALUE_STACK physreg. + MF.getRegInfo().invalidateLiveness(); + + // Sort the blocks, with contiguous sort regions. + sortBlocks(MF, MLI, WEI, MDT); + + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp new file mode 100644 index 000000000000..7e867edaaa27 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -0,0 +1,1389 @@ +//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a CFG stacking pass. +/// +/// This pass inserts BLOCK, LOOP, and TRY markers to mark the start of scopes, +/// since scope boundaries serve as the labels for WebAssembly's control +/// transfers. +/// +/// This is sufficient to convert arbitrary CFGs into a form that works on +/// WebAssembly, provided that all loops are single-entry. +/// +/// In case we use exceptions, this pass also fixes mismatches in unwind +/// destinations created during transforming CFG into wasm structured format. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyExceptionInfo.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/MC/MCAsmInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-cfg-stackify" + +STATISTIC(NumUnwindMismatches, "Number of EH pad unwind mismatches found"); + +namespace { +class WebAssemblyCFGStackify final : public MachineFunctionPass { + StringRef getPassName() const override { return "WebAssembly CFG Stackify"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<WebAssemblyExceptionInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + // For each block whose label represents the end of a scope, record the block + // which holds the beginning of the scope. This will allow us to quickly skip + // over scoped regions when walking blocks. + SmallVector<MachineBasicBlock *, 8> ScopeTops; + + // Placing markers. + void placeMarkers(MachineFunction &MF); + void placeBlockMarker(MachineBasicBlock &MBB); + void placeLoopMarker(MachineBasicBlock &MBB); + void placeTryMarker(MachineBasicBlock &MBB); + void removeUnnecessaryInstrs(MachineFunction &MF); + bool fixUnwindMismatches(MachineFunction &MF); + void rewriteDepthImmediates(MachineFunction &MF); + void fixEndsAtEndOfFunction(MachineFunction &MF); + + // For each BLOCK|LOOP|TRY, the corresponding END_(BLOCK|LOOP|TRY). + DenseMap<const MachineInstr *, MachineInstr *> BeginToEnd; + // For each END_(BLOCK|LOOP|TRY), the corresponding BLOCK|LOOP|TRY. + DenseMap<const MachineInstr *, MachineInstr *> EndToBegin; + // <TRY marker, EH pad> map + DenseMap<const MachineInstr *, MachineBasicBlock *> TryToEHPad; + // <EH pad, TRY marker> map + DenseMap<const MachineBasicBlock *, MachineInstr *> EHPadToTry; + + // There can be an appendix block at the end of each function, shared for: + // - creating a correct signature for fallthrough returns + // - target for rethrows that need to unwind to the caller, but are trapped + // inside another try/catch + MachineBasicBlock *AppendixBB = nullptr; + MachineBasicBlock *getAppendixBlock(MachineFunction &MF) { + if (!AppendixBB) { + AppendixBB = MF.CreateMachineBasicBlock(); + // Give it a fake predecessor so that AsmPrinter prints its label. + AppendixBB->addSuccessor(AppendixBB); + MF.push_back(AppendixBB); + } + return AppendixBB; + } + + // Helper functions to register / unregister scope information created by + // marker instructions. + void registerScope(MachineInstr *Begin, MachineInstr *End); + void registerTryScope(MachineInstr *Begin, MachineInstr *End, + MachineBasicBlock *EHPad); + void unregisterScope(MachineInstr *Begin); + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} + ~WebAssemblyCFGStackify() override { releaseMemory(); } + void releaseMemory() override; +}; +} // end anonymous namespace + +char WebAssemblyCFGStackify::ID = 0; +INITIALIZE_PASS(WebAssemblyCFGStackify, DEBUG_TYPE, + "Insert BLOCK/LOOP/TRY markers for WebAssembly scopes", false, + false) + +FunctionPass *llvm::createWebAssemblyCFGStackify() { + return new WebAssemblyCFGStackify(); +} + +/// Test whether Pred has any terminators explicitly branching to MBB, as +/// opposed to falling through. Note that it's possible (eg. in unoptimized +/// code) for a branch instruction to both branch to a block and fallthrough +/// to it, so we check the actual branch operands to see if there are any +/// explicit mentions. +static bool explicitlyBranchesTo(MachineBasicBlock *Pred, + MachineBasicBlock *MBB) { + for (MachineInstr &MI : Pred->terminators()) + for (MachineOperand &MO : MI.explicit_operands()) + if (MO.isMBB() && MO.getMBB() == MBB) + return true; + return false; +} + +// Returns an iterator to the earliest position possible within the MBB, +// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet +// contains instructions that should go before the marker, and AfterSet contains +// ones that should go after the marker. In this function, AfterSet is only +// used for sanity checking. +static MachineBasicBlock::iterator +getEarliestInsertPos(MachineBasicBlock *MBB, + const SmallPtrSet<const MachineInstr *, 4> &BeforeSet, + const SmallPtrSet<const MachineInstr *, 4> &AfterSet) { + auto InsertPos = MBB->end(); + while (InsertPos != MBB->begin()) { + if (BeforeSet.count(&*std::prev(InsertPos))) { +#ifndef NDEBUG + // Sanity check + for (auto Pos = InsertPos, E = MBB->begin(); Pos != E; --Pos) + assert(!AfterSet.count(&*std::prev(Pos))); +#endif + break; + } + --InsertPos; + } + return InsertPos; +} + +// Returns an iterator to the latest position possible within the MBB, +// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet +// contains instructions that should go before the marker, and AfterSet contains +// ones that should go after the marker. In this function, BeforeSet is only +// used for sanity checking. +static MachineBasicBlock::iterator +getLatestInsertPos(MachineBasicBlock *MBB, + const SmallPtrSet<const MachineInstr *, 4> &BeforeSet, + const SmallPtrSet<const MachineInstr *, 4> &AfterSet) { + auto InsertPos = MBB->begin(); + while (InsertPos != MBB->end()) { + if (AfterSet.count(&*InsertPos)) { +#ifndef NDEBUG + // Sanity check + for (auto Pos = InsertPos, E = MBB->end(); Pos != E; ++Pos) + assert(!BeforeSet.count(&*Pos)); +#endif + break; + } + ++InsertPos; + } + return InsertPos; +} + +void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin, + MachineInstr *End) { + BeginToEnd[Begin] = End; + EndToBegin[End] = Begin; +} + +void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin, + MachineInstr *End, + MachineBasicBlock *EHPad) { + registerScope(Begin, End); + TryToEHPad[Begin] = EHPad; + EHPadToTry[EHPad] = Begin; +} + +void WebAssemblyCFGStackify::unregisterScope(MachineInstr *Begin) { + assert(BeginToEnd.count(Begin)); + MachineInstr *End = BeginToEnd[Begin]; + assert(EndToBegin.count(End)); + BeginToEnd.erase(Begin); + EndToBegin.erase(End); + MachineBasicBlock *EHPad = TryToEHPad.lookup(Begin); + if (EHPad) { + assert(EHPadToTry.count(EHPad)); + TryToEHPad.erase(Begin); + EHPadToTry.erase(EHPad); + } +} + +/// Insert a BLOCK marker for branches to MBB (if needed). +// TODO Consider a more generalized way of handling block (and also loop and +// try) signatures when we implement the multi-value proposal later. +void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { + assert(!MBB.isEHPad()); + MachineFunction &MF = *MBB.getParent(); + auto &MDT = getAnalysis<MachineDominatorTree>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + + // First compute the nearest common dominator of all forward non-fallthrough + // predecessors so that we minimize the time that the BLOCK is on the stack, + // which reduces overall stack height. + MachineBasicBlock *Header = nullptr; + bool IsBranchedTo = false; + bool IsBrOnExn = false; + MachineInstr *BrOnExn = nullptr; + int MBBNumber = MBB.getNumber(); + for (MachineBasicBlock *Pred : MBB.predecessors()) { + if (Pred->getNumber() < MBBNumber) { + Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; + if (explicitlyBranchesTo(Pred, &MBB)) { + IsBranchedTo = true; + if (Pred->getFirstTerminator()->getOpcode() == WebAssembly::BR_ON_EXN) { + IsBrOnExn = true; + assert(!BrOnExn && "There should be only one br_on_exn per block"); + BrOnExn = &*Pred->getFirstTerminator(); + } + } + } + } + if (!Header) + return; + if (!IsBranchedTo) + return; + + assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); + MachineBasicBlock *LayoutPred = MBB.getPrevNode(); + + // If the nearest common dominator is inside a more deeply nested context, + // walk out to the nearest scope which isn't more deeply nested. + for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { + if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { + if (ScopeTop->getNumber() > Header->getNumber()) { + // Skip over an intervening scope. + I = std::next(ScopeTop->getIterator()); + } else { + // We found a scope level at an appropriate depth. + Header = ScopeTop; + break; + } + } + } + + // Decide where in Header to put the BLOCK. + + // Instructions that should go before the BLOCK. + SmallPtrSet<const MachineInstr *, 4> BeforeSet; + // Instructions that should go after the BLOCK. + SmallPtrSet<const MachineInstr *, 4> AfterSet; + for (const auto &MI : *Header) { + // If there is a previously placed LOOP marker and the bottom block of the + // loop is above MBB, it should be after the BLOCK, because the loop is + // nested in this BLOCK. Otherwise it should be before the BLOCK. + if (MI.getOpcode() == WebAssembly::LOOP) { + auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode(); + if (MBB.getNumber() > LoopBottom->getNumber()) + AfterSet.insert(&MI); +#ifndef NDEBUG + else + BeforeSet.insert(&MI); +#endif + } + + // All previously inserted BLOCK/TRY markers should be after the BLOCK + // because they are all nested blocks. + if (MI.getOpcode() == WebAssembly::BLOCK || + MI.getOpcode() == WebAssembly::TRY) + AfterSet.insert(&MI); + +#ifndef NDEBUG + // All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK. + if (MI.getOpcode() == WebAssembly::END_BLOCK || + MI.getOpcode() == WebAssembly::END_LOOP || + MI.getOpcode() == WebAssembly::END_TRY) + BeforeSet.insert(&MI); +#endif + + // Terminators should go after the BLOCK. + if (MI.isTerminator()) + AfterSet.insert(&MI); + } + + // Local expression tree should go after the BLOCK. + for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E; + --I) { + if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) + continue; + if (WebAssembly::isChild(*std::prev(I), MFI)) + AfterSet.insert(&*std::prev(I)); + else + break; + } + + // Add the BLOCK. + + // 'br_on_exn' extracts exnref object and pushes variable number of values + // depending on its tag. For C++ exception, its a single i32 value, and the + // generated code will be in the form of: + // block i32 + // br_on_exn 0, $__cpp_exception + // rethrow + // end_block + WebAssembly::BlockType ReturnType = WebAssembly::BlockType::Void; + if (IsBrOnExn) { + const char *TagName = BrOnExn->getOperand(1).getSymbolName(); + if (std::strcmp(TagName, "__cpp_exception") != 0) + llvm_unreachable("Only C++ exception is supported"); + ReturnType = WebAssembly::BlockType::I32; + } + + auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); + MachineInstr *Begin = + BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos), + TII.get(WebAssembly::BLOCK)) + .addImm(int64_t(ReturnType)); + + // Decide where in Header to put the END_BLOCK. + BeforeSet.clear(); + AfterSet.clear(); + for (auto &MI : MBB) { +#ifndef NDEBUG + // END_BLOCK should precede existing LOOP and TRY markers. + if (MI.getOpcode() == WebAssembly::LOOP || + MI.getOpcode() == WebAssembly::TRY) + AfterSet.insert(&MI); +#endif + + // If there is a previously placed END_LOOP marker and the header of the + // loop is above this block's header, the END_LOOP should be placed after + // the BLOCK, because the loop contains this block. Otherwise the END_LOOP + // should be placed before the BLOCK. The same for END_TRY. + if (MI.getOpcode() == WebAssembly::END_LOOP || + MI.getOpcode() == WebAssembly::END_TRY) { + if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber()) + BeforeSet.insert(&MI); +#ifndef NDEBUG + else + AfterSet.insert(&MI); +#endif + } + } + + // Mark the end of the block. + InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet); + MachineInstr *End = BuildMI(MBB, InsertPos, MBB.findPrevDebugLoc(InsertPos), + TII.get(WebAssembly::END_BLOCK)); + registerScope(Begin, End); + + // Track the farthest-spanning scope that ends at this point. + int Number = MBB.getNumber(); + if (!ScopeTops[Number] || + ScopeTops[Number]->getNumber() > Header->getNumber()) + ScopeTops[Number] = Header; +} + +/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). +void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { + MachineFunction &MF = *MBB.getParent(); + const auto &MLI = getAnalysis<MachineLoopInfo>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + MachineLoop *Loop = MLI.getLoopFor(&MBB); + if (!Loop || Loop->getHeader() != &MBB) + return; + + // The operand of a LOOP is the first block after the loop. If the loop is the + // bottom of the function, insert a dummy block at the end. + MachineBasicBlock *Bottom = WebAssembly::getBottom(Loop); + auto Iter = std::next(Bottom->getIterator()); + if (Iter == MF.end()) { + getAppendixBlock(MF); + Iter = std::next(Bottom->getIterator()); + } + MachineBasicBlock *AfterLoop = &*Iter; + + // Decide where in Header to put the LOOP. + SmallPtrSet<const MachineInstr *, 4> BeforeSet; + SmallPtrSet<const MachineInstr *, 4> AfterSet; + for (const auto &MI : MBB) { + // LOOP marker should be after any existing loop that ends here. Otherwise + // we assume the instruction belongs to the loop. + if (MI.getOpcode() == WebAssembly::END_LOOP) + BeforeSet.insert(&MI); +#ifndef NDEBUG + else + AfterSet.insert(&MI); +#endif + } + + // Mark the beginning of the loop. + auto InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet); + MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos), + TII.get(WebAssembly::LOOP)) + .addImm(int64_t(WebAssembly::BlockType::Void)); + + // Decide where in Header to put the END_LOOP. + BeforeSet.clear(); + AfterSet.clear(); +#ifndef NDEBUG + for (const auto &MI : MBB) + // Existing END_LOOP markers belong to parent loops of this loop + if (MI.getOpcode() == WebAssembly::END_LOOP) + AfterSet.insert(&MI); +#endif + + // Mark the end of the loop (using arbitrary debug location that branched to + // the loop end as its location). + InsertPos = getEarliestInsertPos(AfterLoop, BeforeSet, AfterSet); + DebugLoc EndDL = AfterLoop->pred_empty() + ? DebugLoc() + : (*AfterLoop->pred_rbegin())->findBranchDebugLoc(); + MachineInstr *End = + BuildMI(*AfterLoop, InsertPos, EndDL, TII.get(WebAssembly::END_LOOP)); + registerScope(Begin, End); + + assert((!ScopeTops[AfterLoop->getNumber()] || + ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && + "With block sorting the outermost loop for a block should be first."); + if (!ScopeTops[AfterLoop->getNumber()]) + ScopeTops[AfterLoop->getNumber()] = &MBB; +} + +void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { + assert(MBB.isEHPad()); + MachineFunction &MF = *MBB.getParent(); + auto &MDT = getAnalysis<MachineDominatorTree>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>(); + const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + + // Compute the nearest common dominator of all unwind predecessors + MachineBasicBlock *Header = nullptr; + int MBBNumber = MBB.getNumber(); + for (auto *Pred : MBB.predecessors()) { + if (Pred->getNumber() < MBBNumber) { + Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; + assert(!explicitlyBranchesTo(Pred, &MBB) && + "Explicit branch to an EH pad!"); + } + } + if (!Header) + return; + + // If this try is at the bottom of the function, insert a dummy block at the + // end. + WebAssemblyException *WE = WEI.getExceptionFor(&MBB); + assert(WE); + MachineBasicBlock *Bottom = WebAssembly::getBottom(WE); + + auto Iter = std::next(Bottom->getIterator()); + if (Iter == MF.end()) { + getAppendixBlock(MF); + Iter = std::next(Bottom->getIterator()); + } + MachineBasicBlock *Cont = &*Iter; + + assert(Cont != &MF.front()); + MachineBasicBlock *LayoutPred = Cont->getPrevNode(); + + // If the nearest common dominator is inside a more deeply nested context, + // walk out to the nearest scope which isn't more deeply nested. + for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { + if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { + if (ScopeTop->getNumber() > Header->getNumber()) { + // Skip over an intervening scope. + I = std::next(ScopeTop->getIterator()); + } else { + // We found a scope level at an appropriate depth. + Header = ScopeTop; + break; + } + } + } + + // Decide where in Header to put the TRY. + + // Instructions that should go before the TRY. + SmallPtrSet<const MachineInstr *, 4> BeforeSet; + // Instructions that should go after the TRY. + SmallPtrSet<const MachineInstr *, 4> AfterSet; + for (const auto &MI : *Header) { + // If there is a previously placed LOOP marker and the bottom block of the + // loop is above MBB, it should be after the TRY, because the loop is nested + // in this TRY. Otherwise it should be before the TRY. + if (MI.getOpcode() == WebAssembly::LOOP) { + auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode(); + if (MBB.getNumber() > LoopBottom->getNumber()) + AfterSet.insert(&MI); +#ifndef NDEBUG + else + BeforeSet.insert(&MI); +#endif + } + + // All previously inserted BLOCK/TRY markers should be after the TRY because + // they are all nested trys. + if (MI.getOpcode() == WebAssembly::BLOCK || + MI.getOpcode() == WebAssembly::TRY) + AfterSet.insert(&MI); + +#ifndef NDEBUG + // All END_(BLOCK/LOOP/TRY) markers should be before the TRY. + if (MI.getOpcode() == WebAssembly::END_BLOCK || + MI.getOpcode() == WebAssembly::END_LOOP || + MI.getOpcode() == WebAssembly::END_TRY) + BeforeSet.insert(&MI); +#endif + + // Terminators should go after the TRY. + if (MI.isTerminator()) + AfterSet.insert(&MI); + } + + // If Header unwinds to MBB (= Header contains 'invoke'), the try block should + // contain the call within it. So the call should go after the TRY. The + // exception is when the header's terminator is a rethrow instruction, in + // which case that instruction, not a call instruction before it, is gonna + // throw. + MachineInstr *ThrowingCall = nullptr; + if (MBB.isPredecessor(Header)) { + auto TermPos = Header->getFirstTerminator(); + if (TermPos == Header->end() || + TermPos->getOpcode() != WebAssembly::RETHROW) { + for (auto &MI : reverse(*Header)) { + if (MI.isCall()) { + AfterSet.insert(&MI); + ThrowingCall = &MI; + // Possibly throwing calls are usually wrapped by EH_LABEL + // instructions. We don't want to split them and the call. + if (MI.getIterator() != Header->begin() && + std::prev(MI.getIterator())->isEHLabel()) { + AfterSet.insert(&*std::prev(MI.getIterator())); + ThrowingCall = &*std::prev(MI.getIterator()); + } + break; + } + } + } + } + + // Local expression tree should go after the TRY. + // For BLOCK placement, we start the search from the previous instruction of a + // BB's terminator, but in TRY's case, we should start from the previous + // instruction of a call that can throw, or a EH_LABEL that precedes the call, + // because the return values of the call's previous instructions can be + // stackified and consumed by the throwing call. + auto SearchStartPt = ThrowingCall ? MachineBasicBlock::iterator(ThrowingCall) + : Header->getFirstTerminator(); + for (auto I = SearchStartPt, E = Header->begin(); I != E; --I) { + if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) + continue; + if (WebAssembly::isChild(*std::prev(I), MFI)) + AfterSet.insert(&*std::prev(I)); + else + break; + } + + // Add the TRY. + auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); + MachineInstr *Begin = + BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos), + TII.get(WebAssembly::TRY)) + .addImm(int64_t(WebAssembly::BlockType::Void)); + + // Decide where in Header to put the END_TRY. + BeforeSet.clear(); + AfterSet.clear(); + for (const auto &MI : *Cont) { +#ifndef NDEBUG + // END_TRY should precede existing LOOP and BLOCK markers. + if (MI.getOpcode() == WebAssembly::LOOP || + MI.getOpcode() == WebAssembly::BLOCK) + AfterSet.insert(&MI); + + // All END_TRY markers placed earlier belong to exceptions that contains + // this one. + if (MI.getOpcode() == WebAssembly::END_TRY) + AfterSet.insert(&MI); +#endif + + // If there is a previously placed END_LOOP marker and its header is after + // where TRY marker is, this loop is contained within the 'catch' part, so + // the END_TRY marker should go after that. Otherwise, the whole try-catch + // is contained within this loop, so the END_TRY should go before that. + if (MI.getOpcode() == WebAssembly::END_LOOP) { + // For a LOOP to be after TRY, LOOP's BB should be after TRY's BB; if they + // are in the same BB, LOOP is always before TRY. + if (EndToBegin[&MI]->getParent()->getNumber() > Header->getNumber()) + BeforeSet.insert(&MI); +#ifndef NDEBUG + else + AfterSet.insert(&MI); +#endif + } + + // It is not possible for an END_BLOCK to be already in this block. + } + + // Mark the end of the TRY. + InsertPos = getEarliestInsertPos(Cont, BeforeSet, AfterSet); + MachineInstr *End = + BuildMI(*Cont, InsertPos, Bottom->findBranchDebugLoc(), + TII.get(WebAssembly::END_TRY)); + registerTryScope(Begin, End, &MBB); + + // Track the farthest-spanning scope that ends at this point. We create two + // mappings: (BB with 'end_try' -> BB with 'try') and (BB with 'catch' -> BB + // with 'try'). We need to create 'catch' -> 'try' mapping here too because + // markers should not span across 'catch'. For example, this should not + // happen: + // + // try + // block --| (X) + // catch | + // end_block --| + // end_try + for (int Number : {Cont->getNumber(), MBB.getNumber()}) { + if (!ScopeTops[Number] || + ScopeTops[Number]->getNumber() > Header->getNumber()) + ScopeTops[Number] = Header; + } +} + +void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + // When there is an unconditional branch right before a catch instruction and + // it branches to the end of end_try marker, we don't need the branch, because + // it there is no exception, the control flow transfers to that point anyway. + // bb0: + // try + // ... + // br bb2 <- Not necessary + // bb1: + // catch + // ... + // bb2: + // end + for (auto &MBB : MF) { + if (!MBB.isEHPad()) + continue; + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode(); + MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent(); + bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond); + if (Analyzable && ((Cond.empty() && TBB && TBB == Cont) || + (!Cond.empty() && FBB && FBB == Cont))) + TII.removeBranch(*EHPadLayoutPred); + } + + // When there are block / end_block markers that overlap with try / end_try + // markers, and the block and try markers' return types are the same, the + // block /end_block markers are not necessary, because try / end_try markers + // also can serve as boundaries for branches. + // block <- Not necessary + // try + // ... + // catch + // ... + // end + // end <- Not necessary + SmallVector<MachineInstr *, 32> ToDelete; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.getOpcode() != WebAssembly::TRY) + continue; + + MachineInstr *Try = &MI, *EndTry = BeginToEnd[Try]; + MachineBasicBlock *TryBB = Try->getParent(); + MachineBasicBlock *Cont = EndTry->getParent(); + int64_t RetType = Try->getOperand(0).getImm(); + for (auto B = Try->getIterator(), E = std::next(EndTry->getIterator()); + B != TryBB->begin() && E != Cont->end() && + std::prev(B)->getOpcode() == WebAssembly::BLOCK && + E->getOpcode() == WebAssembly::END_BLOCK && + std::prev(B)->getOperand(0).getImm() == RetType; + --B, ++E) { + ToDelete.push_back(&*std::prev(B)); + ToDelete.push_back(&*E); + } + } + } + for (auto *MI : ToDelete) { + if (MI->getOpcode() == WebAssembly::BLOCK) + unregisterScope(MI); + MI->eraseFromParent(); + } +} + +// When MBB is split into MBB and Split, we should unstackify defs in MBB that +// have their uses in Split. +static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, + MachineBasicBlock &Split, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { + for (auto &MI : Split) { + for (auto &MO : MI.explicit_uses()) { + if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg())) + continue; + if (MachineInstr *Def = MRI.getUniqueVRegDef(MO.getReg())) + if (Def->getParent() == &MBB) + MFI.unstackifyVReg(MO.getReg()); + } + } +} + +bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Linearizing the control flow by placing TRY / END_TRY markers can create + // mismatches in unwind destinations. There are two kinds of mismatches we + // try to solve here. + + // 1. When an instruction may throw, but the EH pad it will unwind to can be + // different from the original CFG. + // + // Example: we have the following CFG: + // bb0: + // call @foo (if it throws, unwind to bb2) + // bb1: + // call @bar (if it throws, unwind to bb3) + // bb2 (ehpad): + // catch + // ... + // bb3 (ehpad) + // catch + // handler body + // + // And the CFG is sorted in this order. Then after placing TRY markers, it + // will look like: (BB markers are omitted) + // try $label1 + // try + // call @foo + // call @bar (if it throws, unwind to bb3) + // catch <- ehpad (bb2) + // ... + // end_try + // catch <- ehpad (bb3) + // handler body + // end_try + // + // Now if bar() throws, it is going to end up ip in bb2, not bb3, where it + // is supposed to end up. We solve this problem by + // a. Split the target unwind EH pad (here bb3) so that the handler body is + // right after 'end_try', which means we extract the handler body out of + // the catch block. We do this because this handler body should be + // somewhere branch-eable from the inner scope. + // b. Wrap the call that has an incorrect unwind destination ('call @bar' + // here) with a nested try/catch/end_try scope, and within the new catch + // block, branches to the handler body. + // c. Place a branch after the newly inserted nested end_try so it can bypass + // the handler body, which is now outside of a catch block. + // + // The result will like as follows. (new: a) means this instruction is newly + // created in the process of doing 'a' above. + // + // block $label0 (new: placeBlockMarker) + // try $label1 + // try + // call @foo + // try (new: b) + // call @bar + // catch (new: b) + // local.set n / drop (new: b) + // br $label1 (new: b) + // end_try (new: b) + // catch <- ehpad (bb2) + // end_try + // br $label0 (new: c) + // catch <- ehpad (bb3) + // end_try (hoisted: a) + // handler body + // end_block (new: placeBlockMarker) + // + // Note that the new wrapping block/end_block will be generated later in + // placeBlockMarker. + // + // TODO Currently local.set and local.gets are generated to move exnref value + // created by catches. That's because we don't support yielding values from a + // block in LLVM machine IR yet, even though it is supported by wasm. Delete + // unnecessary local.get/local.sets once yielding values from a block is + // supported. The full EH spec requires multi-value support to do this, but + // for C++ we don't yet need it because we only throw a single i32. + // + // --- + // 2. The same as 1, but in this case an instruction unwinds to a caller + // function and not another EH pad. + // + // Example: we have the following CFG: + // bb0: + // call @foo (if it throws, unwind to bb2) + // bb1: + // call @bar (if it throws, unwind to caller) + // bb2 (ehpad): + // catch + // ... + // + // And the CFG is sorted in this order. Then after placing TRY markers, it + // will look like: + // try + // call @foo + // call @bar (if it throws, unwind to caller) + // catch <- ehpad (bb2) + // ... + // end_try + // + // Now if bar() throws, it is going to end up ip in bb2, when it is supposed + // throw up to the caller. + // We solve this problem by + // a. Create a new 'appendix' BB at the end of the function and put a single + // 'rethrow' instruction (+ local.get) in there. + // b. Wrap the call that has an incorrect unwind destination ('call @bar' + // here) with a nested try/catch/end_try scope, and within the new catch + // block, branches to the new appendix block. + // + // block $label0 (new: placeBlockMarker) + // try + // call @foo + // try (new: b) + // call @bar + // catch (new: b) + // local.set n (new: b) + // br $label0 (new: b) + // end_try (new: b) + // catch <- ehpad (bb2) + // ... + // end_try + // ... + // end_block (new: placeBlockMarker) + // local.get n (new: a) <- appendix block + // rethrow (new: a) + // + // In case there are multiple calls in a BB that may throw to the caller, they + // can be wrapped together in one nested try scope. (In 1, this couldn't + // happen, because may-throwing instruction there had an unwind destination, + // i.e., it was an invoke before, and there could be only one invoke within a + // BB.) + + SmallVector<const MachineBasicBlock *, 8> EHPadStack; + // Range of intructions to be wrapped in a new nested try/catch + using TryRange = std::pair<MachineInstr *, MachineInstr *>; + // In original CFG, <unwind destination BB, a vector of try ranges> + DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> UnwindDestToTryRanges; + // In new CFG, <destination to branch to, a vector of try ranges> + DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> BrDestToTryRanges; + // In new CFG, <destination to branch to, register containing exnref> + DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg; + + // Gather possibly throwing calls (i.e., previously invokes) whose current + // unwind destination is not the same as the original CFG. + for (auto &MBB : reverse(MF)) { + bool SeenThrowableInstInBB = false; + for (auto &MI : reverse(MBB)) { + if (MI.getOpcode() == WebAssembly::TRY) + EHPadStack.pop_back(); + else if (MI.getOpcode() == WebAssembly::CATCH) + EHPadStack.push_back(MI.getParent()); + + // In this loop we only gather calls that have an EH pad to unwind. So + // there will be at most 1 such call (= invoke) in a BB, so after we've + // seen one, we can skip the rest of BB. Also if MBB has no EH pad + // successor or MI does not throw, this is not an invoke. + if (SeenThrowableInstInBB || !MBB.hasEHPadSuccessor() || + !WebAssembly::mayThrow(MI)) + continue; + SeenThrowableInstInBB = true; + + // If the EH pad on the stack top is where this instruction should unwind + // next, we're good. + MachineBasicBlock *UnwindDest = nullptr; + for (auto *Succ : MBB.successors()) { + if (Succ->isEHPad()) { + UnwindDest = Succ; + break; + } + } + if (EHPadStack.back() == UnwindDest) + continue; + + // If not, record the range. + UnwindDestToTryRanges[UnwindDest].push_back(TryRange(&MI, &MI)); + } + } + + assert(EHPadStack.empty()); + + // Gather possibly throwing calls that are supposed to unwind up to the caller + // if they throw, but currently unwind to an incorrect destination. Unlike the + // loop above, there can be multiple calls within a BB that unwind to the + // caller, which we should group together in a range. + bool NeedAppendixBlock = false; + for (auto &MBB : reverse(MF)) { + MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; // inclusive + for (auto &MI : reverse(MBB)) { + if (MI.getOpcode() == WebAssembly::TRY) + EHPadStack.pop_back(); + else if (MI.getOpcode() == WebAssembly::CATCH) + EHPadStack.push_back(MI.getParent()); + + // If MBB has an EH pad successor, this inst does not unwind to caller. + if (MBB.hasEHPadSuccessor()) + continue; + + // We wrap up the current range when we see a marker even if we haven't + // finished a BB. + if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) { + NeedAppendixBlock = true; + // Record the range. nullptr here means the unwind destination is the + // caller. + UnwindDestToTryRanges[nullptr].push_back( + TryRange(RangeBegin, RangeEnd)); + RangeBegin = RangeEnd = nullptr; // Reset range pointers + } + + // If EHPadStack is empty, that means it is correctly unwind to caller if + // it throws, so we're good. If MI does not throw, we're good too. + if (EHPadStack.empty() || !WebAssembly::mayThrow(MI)) + continue; + + // We found an instruction that unwinds to the caller but currently has an + // incorrect unwind destination. Create a new range or increment the + // currently existing range. + if (!RangeEnd) + RangeBegin = RangeEnd = &MI; + else + RangeBegin = &MI; + } + + if (RangeEnd) { + NeedAppendixBlock = true; + // Record the range. nullptr here means the unwind destination is the + // caller. + UnwindDestToTryRanges[nullptr].push_back(TryRange(RangeBegin, RangeEnd)); + RangeBegin = RangeEnd = nullptr; // Reset range pointers + } + } + + assert(EHPadStack.empty()); + // We don't have any unwind destination mismatches to resolve. + if (UnwindDestToTryRanges.empty()) + return false; + + // If we found instructions that should unwind to the caller but currently + // have incorrect unwind destination, we create an appendix block at the end + // of the function with a local.get and a rethrow instruction. + if (NeedAppendixBlock) { + auto *AppendixBB = getAppendixBlock(MF); + Register ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); + BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW)) + .addReg(ExnReg); + // These instruction ranges should branch to this appendix BB. + for (auto Range : UnwindDestToTryRanges[nullptr]) + BrDestToTryRanges[AppendixBB].push_back(Range); + BrDestToExnReg[AppendixBB] = ExnReg; + } + + // We loop through unwind destination EH pads that are targeted from some + // inner scopes. Because these EH pads are destination of more than one scope + // now, we split them so that the handler body is after 'end_try'. + // - Before + // ehpad: + // catch + // local.set n / drop + // handler body + // ... + // cont: + // end_try + // + // - After + // ehpad: + // catch + // local.set n / drop + // brdest: (new) + // end_try (hoisted from 'cont' BB) + // handler body (taken from 'ehpad') + // ... + // cont: + for (auto &P : UnwindDestToTryRanges) { + NumUnwindMismatches += P.second.size(); + + // This means the destination is the appendix BB, which was separately + // handled above. + if (!P.first) + continue; + + MachineBasicBlock *EHPad = P.first; + + // Find 'catch' and 'local.set' or 'drop' instruction that follows the + // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be + // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is + // generated after 'catch' in LateEHPrepare and we don't support blocks + // taking values yet. + MachineInstr *Catch = nullptr; + unsigned ExnReg = 0; + for (auto &MI : *EHPad) { + switch (MI.getOpcode()) { + case WebAssembly::CATCH: + Catch = &MI; + ExnReg = Catch->getOperand(0).getReg(); + break; + } + } + assert(Catch && "EH pad does not have a catch"); + assert(ExnReg != 0 && "Invalid register"); + + auto SplitPos = std::next(Catch->getIterator()); + + // Create a new BB that's gonna be the destination for branches from the + // inner mismatched scope. + MachineInstr *BeginTry = EHPadToTry[EHPad]; + MachineInstr *EndTry = BeginToEnd[BeginTry]; + MachineBasicBlock *Cont = EndTry->getParent(); + auto *BrDest = MF.CreateMachineBasicBlock(); + MF.insert(std::next(EHPad->getIterator()), BrDest); + // Hoist up the existing 'end_try'. + BrDest->insert(BrDest->end(), EndTry->removeFromParent()); + // Take out the handler body from EH pad to the new branch destination BB. + BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end()); + unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI); + // Fix predecessor-successor relationship. + BrDest->transferSuccessors(EHPad); + EHPad->addSuccessor(BrDest); + + // All try ranges that were supposed to unwind to this EH pad now have to + // branch to this new branch dest BB. + for (auto Range : UnwindDestToTryRanges[EHPad]) + BrDestToTryRanges[BrDest].push_back(Range); + BrDestToExnReg[BrDest] = ExnReg; + + // In case we fall through to the continuation BB after the catch block, we + // now have to add a branch to it. + // - Before + // try + // ... + // (falls through to 'cont') + // catch + // handler body + // end + // <-- cont + // + // - After + // try + // ... + // br %cont (new) + // catch + // end + // handler body + // <-- cont + MachineBasicBlock *EHPadLayoutPred = &*std::prev(EHPad->getIterator()); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond); + if (Analyzable && !TBB && !FBB) { + DebugLoc DL = EHPadLayoutPred->empty() + ? DebugLoc() + : EHPadLayoutPred->rbegin()->getDebugLoc(); + BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont); + } + } + + // For possibly throwing calls whose unwind destinations are currently + // incorrect because of CFG linearization, we wrap them with a nested + // try/catch/end_try, and within the new catch block, we branch to the correct + // handler. + // - Before + // mbb: + // call @foo <- Unwind destination mismatch! + // ehpad: + // ... + // + // - After + // mbb: + // try (new) + // call @foo + // nested-ehpad: (new) + // catch (new) + // local.set n / drop (new) + // br %brdest (new) + // nested-end: (new) + // end_try (new) + // ehpad: + // ... + for (auto &P : BrDestToTryRanges) { + MachineBasicBlock *BrDest = P.first; + auto &TryRanges = P.second; + unsigned ExnReg = BrDestToExnReg[BrDest]; + + for (auto Range : TryRanges) { + MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; + std::tie(RangeBegin, RangeEnd) = Range; + auto *MBB = RangeBegin->getParent(); + + // Include possible EH_LABELs in the range + if (RangeBegin->getIterator() != MBB->begin() && + std::prev(RangeBegin->getIterator())->isEHLabel()) + RangeBegin = &*std::prev(RangeBegin->getIterator()); + if (std::next(RangeEnd->getIterator()) != MBB->end() && + std::next(RangeEnd->getIterator())->isEHLabel()) + RangeEnd = &*std::next(RangeEnd->getIterator()); + + MachineBasicBlock *EHPad = nullptr; + for (auto *Succ : MBB->successors()) { + if (Succ->isEHPad()) { + EHPad = Succ; + break; + } + } + + // Create the nested try instruction. + MachineInstr *NestedTry = + BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(), + TII.get(WebAssembly::TRY)) + .addImm(int64_t(WebAssembly::BlockType::Void)); + + // Create the nested EH pad and fill instructions in. + MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock(); + MF.insert(std::next(MBB->getIterator()), NestedEHPad); + NestedEHPad->setIsEHPad(); + NestedEHPad->setIsEHScopeEntry(); + BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::CATCH), + ExnReg); + BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::BR)) + .addMBB(BrDest); + + // Create the nested continuation BB and end_try instruction. + MachineBasicBlock *NestedCont = MF.CreateMachineBasicBlock(); + MF.insert(std::next(NestedEHPad->getIterator()), NestedCont); + MachineInstr *NestedEndTry = + BuildMI(*NestedCont, NestedCont->begin(), RangeEnd->getDebugLoc(), + TII.get(WebAssembly::END_TRY)); + // In case MBB has more instructions after the try range, move them to the + // new nested continuation BB. + NestedCont->splice(NestedCont->end(), MBB, + std::next(RangeEnd->getIterator()), MBB->end()); + unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI); + registerTryScope(NestedTry, NestedEndTry, NestedEHPad); + + // Fix predecessor-successor relationship. + NestedCont->transferSuccessors(MBB); + if (EHPad) + NestedCont->removeSuccessor(EHPad); + MBB->addSuccessor(NestedEHPad); + MBB->addSuccessor(NestedCont); + NestedEHPad->addSuccessor(BrDest); + } + } + + // Renumber BBs and recalculate ScopeTop info because new BBs might have been + // created and inserted above. + MF.RenumberBlocks(); + ScopeTops.clear(); + ScopeTops.resize(MF.getNumBlockIDs()); + for (auto &MBB : reverse(MF)) { + for (auto &MI : reverse(MBB)) { + if (ScopeTops[MBB.getNumber()]) + break; + switch (MI.getOpcode()) { + case WebAssembly::END_BLOCK: + case WebAssembly::END_LOOP: + case WebAssembly::END_TRY: + ScopeTops[MBB.getNumber()] = EndToBegin[&MI]->getParent(); + break; + case WebAssembly::CATCH: + ScopeTops[MBB.getNumber()] = EHPadToTry[&MBB]->getParent(); + break; + } + } + } + + // Recompute the dominator tree. + getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF); + + // Place block markers for newly added branches. + SmallVector <MachineBasicBlock *, 8> BrDests; + for (auto &P : BrDestToTryRanges) + BrDests.push_back(P.first); + llvm::sort(BrDests, + [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { + auto ANum = A->getNumber(); + auto BNum = B->getNumber(); + return ANum < BNum; + }); + for (auto *Dest : BrDests) + placeBlockMarker(*Dest); + + return true; +} + +static unsigned +getDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack, + const MachineBasicBlock *MBB) { + unsigned Depth = 0; + for (auto X : reverse(Stack)) { + if (X == MBB) + break; + ++Depth; + } + assert(Depth < Stack.size() && "Branch destination should be in scope"); + return Depth; +} + +/// In normal assembly languages, when the end of a function is unreachable, +/// because the function ends in an infinite loop or a noreturn call or similar, +/// it isn't necessary to worry about the function return type at the end of +/// the function, because it's never reached. However, in WebAssembly, blocks +/// that end at the function end need to have a return type signature that +/// matches the function signature, even though it's unreachable. This function +/// checks for such cases and fixes up the signatures. +void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { + const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + + if (MFI.getResults().empty()) + return; + + // MCInstLower will add the proper types to multivalue signatures based on the + // function return type + WebAssembly::BlockType RetType = + MFI.getResults().size() > 1 + ? WebAssembly::BlockType::Multivalue + : WebAssembly::BlockType( + WebAssembly::toValType(MFI.getResults().front())); + + for (MachineBasicBlock &MBB : reverse(MF)) { + for (MachineInstr &MI : reverse(MBB)) { + if (MI.isPosition() || MI.isDebugInstr()) + continue; + switch (MI.getOpcode()) { + case WebAssembly::END_BLOCK: + case WebAssembly::END_LOOP: + case WebAssembly::END_TRY: + EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); + continue; + default: + // Something other than an `end`. We're done. + return; + } + } + } +} + +// WebAssembly functions end with an end instruction, as if the function body +// were a block. +static void appendEndToFunction(MachineFunction &MF, + const WebAssemblyInstrInfo &TII) { + BuildMI(MF.back(), MF.back().end(), + MF.back().findPrevDebugLoc(MF.back().end()), + TII.get(WebAssembly::END_FUNCTION)); +} + +/// Insert LOOP/TRY/BLOCK markers at appropriate places. +void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) { + // We allocate one more than the number of blocks in the function to + // accommodate for the possible fake block we may insert at the end. + ScopeTops.resize(MF.getNumBlockIDs() + 1); + // Place the LOOP for MBB if MBB is the header of a loop. + for (auto &MBB : MF) + placeLoopMarker(MBB); + + const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo(); + for (auto &MBB : MF) { + if (MBB.isEHPad()) { + // Place the TRY for MBB if MBB is the EH pad of an exception. + if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn()) + placeTryMarker(MBB); + } else { + // Place the BLOCK for MBB if MBB is branched to from above. + placeBlockMarker(MBB); + } + } + // Fix mismatches in unwind destinations induced by linearizing the code. + if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn()) + fixUnwindMismatches(MF); +} + +void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { + // Now rewrite references to basic blocks to be depth immediates. + SmallVector<const MachineBasicBlock *, 8> Stack; + for (auto &MBB : reverse(MF)) { + for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) { + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + case WebAssembly::BLOCK: + case WebAssembly::TRY: + assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <= + MBB.getNumber() && + "Block/try marker should be balanced"); + Stack.pop_back(); + break; + + case WebAssembly::LOOP: + assert(Stack.back() == &MBB && "Loop top should be balanced"); + Stack.pop_back(); + break; + + case WebAssembly::END_BLOCK: + case WebAssembly::END_TRY: + Stack.push_back(&MBB); + break; + + case WebAssembly::END_LOOP: + Stack.push_back(EndToBegin[&MI]->getParent()); + break; + + default: + if (MI.isTerminator()) { + // Rewrite MBB operands to be depth immediates. + SmallVector<MachineOperand, 4> Ops(MI.operands()); + while (MI.getNumOperands() > 0) + MI.RemoveOperand(MI.getNumOperands() - 1); + for (auto MO : Ops) { + if (MO.isMBB()) + MO = MachineOperand::CreateImm(getDepth(Stack, MO.getMBB())); + MI.addOperand(MF, MO); + } + } + break; + } + } + } + assert(Stack.empty() && "Control flow should be balanced"); +} + +void WebAssemblyCFGStackify::releaseMemory() { + ScopeTops.clear(); + BeginToEnd.clear(); + EndToBegin.clear(); + TryToEHPad.clear(); + EHPadToTry.clear(); + AppendixBB = nullptr; +} + +bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** CFG Stackifying **********\n" + "********** Function: " + << MF.getName() << '\n'); + const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo(); + + releaseMemory(); + + // Liveness is not tracked for VALUE_STACK physreg. + MF.getRegInfo().invalidateLiveness(); + + // Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes. + placeMarkers(MF); + + // Remove unnecessary instructions possibly introduced by try/end_trys. + if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn()) + removeUnnecessaryInstrs(MF); + + // Convert MBB operands in terminators to relative depth immediates. + rewriteDepthImmediates(MF); + + // Fix up block/loop/try signatures at the end of the function to conform to + // WebAssembly's rules. + fixEndsAtEndOfFunction(MF); + + // Add an end instruction at the end of the function body. + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + if (!MF.getSubtarget<WebAssemblySubtarget>() + .getTargetTriple() + .isOSBinFormatELF()) + appendEndToFunction(MF, TII); + + MF.getInfo<WebAssemblyFunctionInfo>()->setCFGStackified(); + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp new file mode 100644 index 000000000000..2537e6042b1e --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp @@ -0,0 +1,150 @@ +//===-- WebAssemblyCallIndirectFixup.cpp - Fix call_indirects -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file converts pseudo call_indirect instructions into real +/// call_indirects. +/// +/// The order of arguments for a call_indirect is the arguments to the function +/// call, followed by the function pointer. There's no natural way to express +/// a machineinstr with varargs followed by one more arg, so we express it as +/// the function pointer followed by varargs, then rewrite it here. +/// +/// We need to rewrite the order of the arguments on the machineinstrs +/// themselves so that register stackification knows the order they'll be +/// executed in. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-call-indirect-fixup" + +namespace { +class WebAssemblyCallIndirectFixup final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly CallIndirect Fixup"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyCallIndirectFixup() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyCallIndirectFixup::ID = 0; +INITIALIZE_PASS(WebAssemblyCallIndirectFixup, DEBUG_TYPE, + "Rewrite call_indirect argument orderings", false, false) + +FunctionPass *llvm::createWebAssemblyCallIndirectFixup() { + return new WebAssemblyCallIndirectFixup(); +} + +static unsigned getNonPseudoCallIndirectOpcode(const MachineInstr &MI) { + switch (MI.getOpcode()) { + using namespace WebAssembly; + case PCALL_INDIRECT_VOID: + return CALL_INDIRECT_VOID; + case PCALL_INDIRECT_i32: + return CALL_INDIRECT_i32; + case PCALL_INDIRECT_i64: + return CALL_INDIRECT_i64; + case PCALL_INDIRECT_f32: + return CALL_INDIRECT_f32; + case PCALL_INDIRECT_f64: + return CALL_INDIRECT_f64; + case PCALL_INDIRECT_v16i8: + return CALL_INDIRECT_v16i8; + case PCALL_INDIRECT_v8i16: + return CALL_INDIRECT_v8i16; + case PCALL_INDIRECT_v4i32: + return CALL_INDIRECT_v4i32; + case PCALL_INDIRECT_v2i64: + return CALL_INDIRECT_v2i64; + case PCALL_INDIRECT_v4f32: + return CALL_INDIRECT_v4f32; + case PCALL_INDIRECT_v2f64: + return CALL_INDIRECT_v2f64; + case PCALL_INDIRECT_exnref: + return CALL_INDIRECT_exnref; + case PRET_CALL_INDIRECT: + return RET_CALL_INDIRECT; + default: + return INSTRUCTION_LIST_END; + } +} + +static bool isPseudoCallIndirect(const MachineInstr &MI) { + return getNonPseudoCallIndirectOpcode(MI) != + WebAssembly::INSTRUCTION_LIST_END; +} + +bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n" + << "********** Function: " << MF.getName() << '\n'); + + bool Changed = false; + const WebAssemblyInstrInfo *TII = + MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (isPseudoCallIndirect(MI)) { + LLVM_DEBUG(dbgs() << "Found call_indirect: " << MI << '\n'); + + // Rewrite pseudo to non-pseudo + const MCInstrDesc &Desc = TII->get(getNonPseudoCallIndirectOpcode(MI)); + MI.setDesc(Desc); + + // Rewrite argument order + SmallVector<MachineOperand, 8> Ops; + + // Set up a placeholder for the type signature immediate. + Ops.push_back(MachineOperand::CreateImm(0)); + + // Set up the flags immediate, which currently has no defined flags + // so it's always zero. + Ops.push_back(MachineOperand::CreateImm(0)); + + for (const MachineOperand &MO : + make_range(MI.operands_begin() + MI.getDesc().getNumDefs() + 1, + MI.operands_begin() + MI.getNumExplicitOperands())) + Ops.push_back(MO); + Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs())); + + // Replace the instructions operands. + while (MI.getNumOperands() > MI.getDesc().getNumDefs()) + MI.RemoveOperand(MI.getNumOperands() - 1); + for (const MachineOperand &MO : Ops) + MI.addOperand(MO); + + LLVM_DEBUG(dbgs() << " After transform: " << MI); + Changed = true; + } + } + } + + LLVM_DEBUG(dbgs() << "\nDone fixing up CALL_INDIRECTs\n\n"); + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp new file mode 100644 index 000000000000..579377c9a5d7 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp @@ -0,0 +1,45 @@ +//===-- WebAssemblyDebugValueManager.cpp - WebAssembly DebugValue Manager -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the manager for MachineInstr DebugValues. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyDebugValueManager.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineInstr.h" + +using namespace llvm; + +WebAssemblyDebugValueManager::WebAssemblyDebugValueManager( + MachineInstr *Instr) { + Instr->collectDebugValues(DbgValues); +} + +void WebAssemblyDebugValueManager::move(MachineInstr *Insert) { + MachineBasicBlock *MBB = Insert->getParent(); + for (MachineInstr *DBI : reverse(DbgValues)) + MBB->splice(Insert, DBI->getParent(), DBI); +} + +void WebAssemblyDebugValueManager::updateReg(unsigned Reg) { + for (auto *DBI : DbgValues) + DBI->getOperand(0).setReg(Reg); +} + +void WebAssemblyDebugValueManager::clone(MachineInstr *Insert, + unsigned NewReg) { + MachineBasicBlock *MBB = Insert->getParent(); + MachineFunction *MF = MBB->getParent(); + for (MachineInstr *DBI : reverse(DbgValues)) { + MachineInstr *Clone = MF->CloneMachineInstr(DBI); + Clone->getOperand(0).setReg(NewReg); + MBB->insert(Insert, Clone); + } +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h new file mode 100644 index 000000000000..06e8805b5ad0 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h @@ -0,0 +1,37 @@ +// WebAssemblyDebugValueManager.h - WebAssembly DebugValue Manager -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the WebAssembly-specific +/// manager for DebugValues associated with the specific MachineInstr. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class MachineInstr; + +class WebAssemblyDebugValueManager { + SmallVector<MachineInstr *, 2> DbgValues; + +public: + WebAssemblyDebugValueManager(MachineInstr *Instr); + + void move(MachineInstr *Insert); + void updateReg(unsigned Reg); + void clone(MachineInstr *Insert, unsigned NewReg); +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp new file mode 100644 index 000000000000..0387957b14c2 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp @@ -0,0 +1,185 @@ +//===--- WebAssemblyExceptionInfo.cpp - Exception Infomation --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements WebAssemblyException information analysis. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyExceptionInfo.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasm-exception-info" + +char WebAssemblyExceptionInfo::ID = 0; + +INITIALIZE_PASS_BEGIN(WebAssemblyExceptionInfo, DEBUG_TYPE, + "WebAssembly Exception Information", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(WebAssemblyExceptionInfo, DEBUG_TYPE, + "WebAssembly Exception Information", true, true) + +bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Exception Info Calculation **********\n" + "********** Function: " + << MF.getName() << '\n'); + releaseMemory(); + auto &MDT = getAnalysis<MachineDominatorTree>(); + auto &MDF = getAnalysis<MachineDominanceFrontier>(); + recalculate(MDT, MDF); + return false; +} + +void WebAssemblyExceptionInfo::recalculate( + MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF) { + // Postorder traversal of the dominator tree. + SmallVector<WebAssemblyException *, 8> Exceptions; + for (auto DomNode : post_order(&MDT)) { + MachineBasicBlock *EHPad = DomNode->getBlock(); + if (!EHPad->isEHPad()) + continue; + auto *WE = new WebAssemblyException(EHPad); + discoverAndMapException(WE, MDT, MDF); + Exceptions.push_back(WE); + } + + // Add BBs to exceptions + for (auto DomNode : post_order(&MDT)) { + MachineBasicBlock *MBB = DomNode->getBlock(); + WebAssemblyException *WE = getExceptionFor(MBB); + for (; WE; WE = WE->getParentException()) + WE->addBlock(MBB); + } + + // Add subexceptions to exceptions + for (auto *WE : Exceptions) { + if (WE->getParentException()) + WE->getParentException()->getSubExceptions().push_back(WE); + else + addTopLevelException(WE); + } + + // For convenience, Blocks and SubExceptions are inserted in postorder. + // Reverse the lists. + for (auto *WE : Exceptions) { + WE->reverseBlock(); + std::reverse(WE->getSubExceptions().begin(), WE->getSubExceptions().end()); + } +} + +void WebAssemblyExceptionInfo::releaseMemory() { + BBMap.clear(); + DeleteContainerPointers(TopLevelExceptions); + TopLevelExceptions.clear(); +} + +void WebAssemblyExceptionInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominanceFrontier>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void WebAssemblyExceptionInfo::discoverAndMapException( + WebAssemblyException *WE, const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF) { + unsigned NumBlocks = 0; + unsigned NumSubExceptions = 0; + + // Map blocks that belong to a catchpad / cleanuppad + MachineBasicBlock *EHPad = WE->getEHPad(); + SmallVector<MachineBasicBlock *, 8> WL; + WL.push_back(EHPad); + while (!WL.empty()) { + MachineBasicBlock *MBB = WL.pop_back_val(); + + // Find its outermost discovered exception. If this is a discovered block, + // check if it is already discovered to be a subexception of this exception. + WebAssemblyException *SubE = getOutermostException(MBB); + if (SubE) { + if (SubE != WE) { + // Discover a subexception of this exception. + SubE->setParentException(WE); + ++NumSubExceptions; + NumBlocks += SubE->getBlocksVector().capacity(); + // All blocks that belong to this subexception have been already + // discovered. Skip all of them. Add the subexception's landing pad's + // dominance frontier to the worklist. + for (auto &Frontier : MDF.find(SubE->getEHPad())->second) + if (MDT.dominates(EHPad, Frontier)) + WL.push_back(Frontier); + } + continue; + } + + // This is an undiscovered block. Map it to the current exception. + changeExceptionFor(MBB, WE); + ++NumBlocks; + + // Add successors dominated by the current BB to the worklist. + for (auto *Succ : MBB->successors()) + if (MDT.dominates(EHPad, Succ)) + WL.push_back(Succ); + } + + WE->getSubExceptions().reserve(NumSubExceptions); + WE->reserveBlocks(NumBlocks); +} + +WebAssemblyException * +WebAssemblyExceptionInfo::getOutermostException(MachineBasicBlock *MBB) const { + WebAssemblyException *WE = getExceptionFor(MBB); + if (WE) { + while (WebAssemblyException *Parent = WE->getParentException()) + WE = Parent; + } + return WE; +} + +void WebAssemblyException::print(raw_ostream &OS, unsigned Depth) const { + OS.indent(Depth * 2) << "Exception at depth " << getExceptionDepth() + << " containing: "; + + for (unsigned I = 0; I < getBlocks().size(); ++I) { + MachineBasicBlock *MBB = getBlocks()[I]; + if (I) + OS << ", "; + OS << "%bb." << MBB->getNumber(); + if (const auto *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OS << "." << BB->getName(); + + if (getEHPad() == MBB) + OS << " (landing-pad)"; + } + OS << "\n"; + + for (auto &SubE : SubExceptions) + SubE->print(OS, Depth + 2); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void WebAssemblyException::dump() const { print(dbgs()); } +#endif + +raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE) { + WE.print(OS); + return OS; +} + +void WebAssemblyExceptionInfo::print(raw_ostream &OS, const Module *) const { + for (auto *WE : TopLevelExceptions) + WE->print(OS); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h new file mode 100644 index 000000000000..9a90d7df7d47 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h @@ -0,0 +1,169 @@ +//===-- WebAssemblyExceptionInfo.h - WebAssembly Exception Info -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements WebAssemblyException information analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYEXCEPTIONINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYEXCEPTIONINFO_H + +#include "WebAssembly.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class MachineDominatorTree; +class MachineDominanceFrontier; + +// WebAssembly instructions for exception handling are structured as follows: +// try +// instructions* +// catch ----| +// instructions* | -> A WebAssemblyException consists of this region +// end ----| +// +// A WebAssemblyException object contains BBs that belong to a 'catch' part of +// the try-catch-end structure to be created later. 'try' and 'end' markers +// are not present at this stage and will be generated in CFGStackify pass. +// Because CFGSort requires all the BBs within a catch part to be sorted +// together as it does for loops, this pass calculates the nesting structure of +// catch part of exceptions in a function. +// +// An exception catch part is defined as a BB with catch instruction and all +// other BBs dominated by this BB. +class WebAssemblyException { + MachineBasicBlock *EHPad = nullptr; + + WebAssemblyException *ParentException = nullptr; + std::vector<WebAssemblyException *> SubExceptions; + std::vector<MachineBasicBlock *> Blocks; + SmallPtrSet<const MachineBasicBlock *, 8> BlockSet; + +public: + WebAssemblyException(MachineBasicBlock *EHPad) : EHPad(EHPad) {} + ~WebAssemblyException() { DeleteContainerPointers(SubExceptions); } + WebAssemblyException(const WebAssemblyException &) = delete; + const WebAssemblyException &operator=(const WebAssemblyException &) = delete; + + MachineBasicBlock *getEHPad() const { return EHPad; } + MachineBasicBlock *getHeader() const { return EHPad; } + WebAssemblyException *getParentException() const { return ParentException; } + void setParentException(WebAssemblyException *WE) { ParentException = WE; } + + bool contains(const WebAssemblyException *WE) const { + if (WE == this) + return true; + if (!WE) + return false; + return contains(WE->getParentException()); + } + bool contains(const MachineBasicBlock *MBB) const { + return BlockSet.count(MBB); + } + + void addBlock(MachineBasicBlock *MBB) { + Blocks.push_back(MBB); + BlockSet.insert(MBB); + } + ArrayRef<MachineBasicBlock *> getBlocks() const { return Blocks; } + using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator; + block_iterator block_begin() const { return getBlocks().begin(); } + block_iterator block_end() const { return getBlocks().end(); } + inline iterator_range<block_iterator> blocks() const { + return make_range(block_begin(), block_end()); + } + unsigned getNumBlocks() const { return Blocks.size(); } + std::vector<MachineBasicBlock *> &getBlocksVector() { return Blocks; } + + const std::vector<WebAssemblyException *> &getSubExceptions() const { + return SubExceptions; + } + std::vector<WebAssemblyException *> &getSubExceptions() { + return SubExceptions; + } + void addSubException(WebAssemblyException *E) { SubExceptions.push_back(E); } + using iterator = typename std::vector<WebAssemblyException *>::const_iterator; + iterator begin() const { return SubExceptions.begin(); } + iterator end() const { return SubExceptions.end(); } + + void reserveBlocks(unsigned Size) { Blocks.reserve(Size); } + void reverseBlock(unsigned From = 0) { + std::reverse(Blocks.begin() + From, Blocks.end()); + } + + // Return the nesting level. An outermost one has depth 1. + unsigned getExceptionDepth() const { + unsigned D = 1; + for (const WebAssemblyException *CurException = ParentException; + CurException; CurException = CurException->ParentException) + ++D; + return D; + } + + void print(raw_ostream &OS, unsigned Depth = 0) const; + void dump() const; +}; + +raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE); + +class WebAssemblyExceptionInfo final : public MachineFunctionPass { + // Mapping of basic blocks to the innermost exception they occur in + DenseMap<const MachineBasicBlock *, WebAssemblyException *> BBMap; + std::vector<WebAssemblyException *> TopLevelExceptions; + + void discoverAndMapException(WebAssemblyException *WE, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF); + WebAssemblyException *getOutermostException(MachineBasicBlock *MBB) const; + +public: + static char ID; + WebAssemblyExceptionInfo() : MachineFunctionPass(ID) { + initializeWebAssemblyExceptionInfoPass(*PassRegistry::getPassRegistry()); + } + ~WebAssemblyExceptionInfo() override { releaseMemory(); } + WebAssemblyExceptionInfo(const WebAssemblyExceptionInfo &) = delete; + WebAssemblyExceptionInfo & + operator=(const WebAssemblyExceptionInfo &) = delete; + + bool runOnMachineFunction(MachineFunction &) override; + void releaseMemory() override; + void recalculate(MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF); + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool empty() const { return TopLevelExceptions.empty(); } + + // Return the innermost exception that MBB lives in. If the block is not in an + // exception, null is returned. + WebAssemblyException *getExceptionFor(const MachineBasicBlock *MBB) const { + return BBMap.lookup(MBB); + } + + void changeExceptionFor(MachineBasicBlock *MBB, WebAssemblyException *WE) { + if (!WE) { + BBMap.erase(MBB); + return; + } + BBMap[MBB] = WE; + } + + void addTopLevelException(WebAssemblyException *WE) { + assert(!WE->getParentException() && "Not a top level exception!"); + TopLevelExceptions.push_back(WE); + } + + void print(raw_ostream &OS, const Module *M = nullptr) const override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp new file mode 100644 index 000000000000..ef75bb215317 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -0,0 +1,399 @@ +//===-- WebAssemblyExplicitLocals.cpp - Make Locals Explicit --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file converts any remaining registers into WebAssembly locals. +/// +/// After register stackification and register coloring, convert non-stackified +/// registers into locals, inserting explicit local.get and local.set +/// instructions. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-explicit-locals" + +// A command-line option to disable this pass, and keep implicit locals +// for the purpose of testing with lit/llc ONLY. +// This produces output which is not valid WebAssembly, and is not supported +// by assemblers/disassemblers and other MC based tools. +static cl::opt<bool> WasmDisableExplicitLocals( + "wasm-disable-explicit-locals", cl::Hidden, + cl::desc("WebAssembly: output implicit locals in" + " instruction output for test purposes only."), + cl::init(false)); + +namespace { +class WebAssemblyExplicitLocals final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Explicit Locals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyExplicitLocals() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyExplicitLocals::ID = 0; +INITIALIZE_PASS(WebAssemblyExplicitLocals, DEBUG_TYPE, + "Convert registers to WebAssembly locals", false, false) + +FunctionPass *llvm::createWebAssemblyExplicitLocals() { + return new WebAssemblyExplicitLocals(); +} + +/// Return a local id number for the given register, assigning it a new one +/// if it doesn't yet have one. +static unsigned getLocalId(DenseMap<unsigned, unsigned> &Reg2Local, + unsigned &CurLocal, unsigned Reg) { + auto P = Reg2Local.insert(std::make_pair(Reg, CurLocal)); + if (P.second) + ++CurLocal; + return P.first->second; +} + +/// Get the appropriate drop opcode for the given register class. +static unsigned getDropOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::DROP_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::DROP_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::DROP_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::DROP_F64; + if (RC == &WebAssembly::V128RegClass) + return WebAssembly::DROP_V128; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::DROP_EXNREF; + llvm_unreachable("Unexpected register class"); +} + +/// Get the appropriate local.get opcode for the given register class. +static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::LOCAL_GET_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::LOCAL_GET_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::LOCAL_GET_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::LOCAL_GET_F64; + if (RC == &WebAssembly::V128RegClass) + return WebAssembly::LOCAL_GET_V128; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_GET_EXNREF; + llvm_unreachable("Unexpected register class"); +} + +/// Get the appropriate local.set opcode for the given register class. +static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::LOCAL_SET_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::LOCAL_SET_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::LOCAL_SET_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::LOCAL_SET_F64; + if (RC == &WebAssembly::V128RegClass) + return WebAssembly::LOCAL_SET_V128; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_SET_EXNREF; + llvm_unreachable("Unexpected register class"); +} + +/// Get the appropriate local.tee opcode for the given register class. +static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::LOCAL_TEE_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::LOCAL_TEE_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::LOCAL_TEE_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::LOCAL_TEE_F64; + if (RC == &WebAssembly::V128RegClass) + return WebAssembly::LOCAL_TEE_V128; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_TEE_EXNREF; + llvm_unreachable("Unexpected register class"); +} + +/// Get the type associated with the given register class. +static MVT typeForRegClass(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return MVT::i32; + if (RC == &WebAssembly::I64RegClass) + return MVT::i64; + if (RC == &WebAssembly::F32RegClass) + return MVT::f32; + if (RC == &WebAssembly::F64RegClass) + return MVT::f64; + if (RC == &WebAssembly::V128RegClass) + return MVT::v16i8; + if (RC == &WebAssembly::EXNREFRegClass) + return MVT::exnref; + llvm_unreachable("unrecognized register class"); +} + +/// Given a MachineOperand of a stackified vreg, return the instruction at the +/// start of the expression tree. +static MachineInstr *findStartOfTree(MachineOperand &MO, + MachineRegisterInfo &MRI, + WebAssemblyFunctionInfo &MFI) { + Register Reg = MO.getReg(); + assert(MFI.isVRegStackified(Reg)); + MachineInstr *Def = MRI.getVRegDef(Reg); + + // Find the first stackified use and proceed from there. + for (MachineOperand &DefMO : Def->explicit_uses()) { + if (!DefMO.isReg()) + continue; + return findStartOfTree(DefMO, MRI, MFI); + } + + // If there were no stackified uses, we've reached the start. + return Def; +} + +bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Make Locals Explicit **********\n" + "********** Function: " + << MF.getName() << '\n'); + + // Disable this pass if directed to do so. + if (WasmDisableExplicitLocals) + return false; + + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + // Map non-stackified virtual registers to their local ids. + DenseMap<unsigned, unsigned> Reg2Local; + + // Handle ARGUMENTS first to ensure that they get the designated numbers. + for (MachineBasicBlock::iterator I = MF.begin()->begin(), + E = MF.begin()->end(); + I != E;) { + MachineInstr &MI = *I++; + if (!WebAssembly::isArgument(MI.getOpcode())) + break; + Register Reg = MI.getOperand(0).getReg(); + assert(!MFI.isVRegStackified(Reg)); + Reg2Local[Reg] = static_cast<unsigned>(MI.getOperand(1).getImm()); + MI.eraseFromParent(); + Changed = true; + } + + // Start assigning local numbers after the last parameter. + unsigned CurLocal = static_cast<unsigned>(MFI.getParams().size()); + + // Precompute the set of registers that are unused, so that we can insert + // drops to their defs. + BitVector UseEmpty(MRI.getNumVirtRegs()); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) + UseEmpty[I] = MRI.use_empty(Register::index2VirtReg(I)); + + // Visit each instruction in the function. + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { + MachineInstr &MI = *I++; + assert(!WebAssembly::isArgument(MI.getOpcode())); + + if (MI.isDebugInstr() || MI.isLabel()) + continue; + + // Replace tee instructions with local.tee. The difference is that tee + // instructions have two defs, while local.tee instructions have one def + // and an index of a local to write to. + if (WebAssembly::isTee(MI.getOpcode())) { + assert(MFI.isVRegStackified(MI.getOperand(0).getReg())); + assert(!MFI.isVRegStackified(MI.getOperand(1).getReg())); + Register OldReg = MI.getOperand(2).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(OldReg); + + // Stackify the input if it isn't stackified yet. + if (!MFI.isVRegStackified(OldReg)) { + unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + Register NewReg = MRI.createVirtualRegister(RC); + unsigned Opc = getLocalGetOpcode(RC); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg) + .addImm(LocalId); + MI.getOperand(2).setReg(NewReg); + MFI.stackifyVReg(NewReg); + } + + // Replace the TEE with a LOCAL_TEE. + unsigned LocalId = + getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg()); + unsigned Opc = getLocalTeeOpcode(RC); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), + MI.getOperand(0).getReg()) + .addImm(LocalId) + .addReg(MI.getOperand(2).getReg()); + + MI.eraseFromParent(); + Changed = true; + continue; + } + + // Insert local.sets for any defs that aren't stackified yet. Currently + // we handle at most one def. + assert(MI.getDesc().getNumDefs() <= 1); + if (MI.getDesc().getNumDefs() == 1) { + Register OldReg = MI.getOperand(0).getReg(); + if (!MFI.isVRegStackified(OldReg)) { + const TargetRegisterClass *RC = MRI.getRegClass(OldReg); + Register NewReg = MRI.createVirtualRegister(RC); + auto InsertPt = std::next(MI.getIterator()); + if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) { + MI.eraseFromParent(); + Changed = true; + continue; + } + if (UseEmpty[Register::virtReg2Index(OldReg)]) { + unsigned Opc = getDropOpcode(RC); + MachineInstr *Drop = + BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc)) + .addReg(NewReg); + // After the drop instruction, this reg operand will not be used + Drop->getOperand(0).setIsKill(); + } else { + unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + unsigned Opc = getLocalSetOpcode(RC); + BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc)) + .addImm(LocalId) + .addReg(NewReg); + } + MI.getOperand(0).setReg(NewReg); + // This register operand of the original instruction is now being used + // by the inserted drop or local.set instruction, so make it not dead + // yet. + MI.getOperand(0).setIsDead(false); + MFI.stackifyVReg(NewReg); + Changed = true; + } + } + + // Insert local.gets for any uses that aren't stackified yet. + MachineInstr *InsertPt = &MI; + for (MachineOperand &MO : reverse(MI.explicit_uses())) { + if (!MO.isReg()) + continue; + + Register OldReg = MO.getReg(); + + // Inline asm may have a def in the middle of the operands. Our contract + // with inline asm register operands is to provide local indices as + // immediates. + if (MO.isDef()) { + assert(MI.isInlineAsm()); + unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + // If this register operand is tied to another operand, we can't + // change it to an immediate. Untie it first. + MI.untieRegOperand(MI.getOperandNo(&MO)); + MO.ChangeToImmediate(LocalId); + continue; + } + + // If we see a stackified register, prepare to insert subsequent + // local.gets before the start of its tree. + if (MFI.isVRegStackified(OldReg)) { + InsertPt = findStartOfTree(MO, MRI, MFI); + continue; + } + + // Our contract with inline asm register operands is to provide local + // indices as immediates. + if (MI.isInlineAsm()) { + unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + // Untie it first if this reg operand is tied to another operand. + MI.untieRegOperand(MI.getOperandNo(&MO)); + MO.ChangeToImmediate(LocalId); + continue; + } + + // Insert a local.get. + unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); + const TargetRegisterClass *RC = MRI.getRegClass(OldReg); + Register NewReg = MRI.createVirtualRegister(RC); + unsigned Opc = getLocalGetOpcode(RC); + InsertPt = + BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg) + .addImm(LocalId); + MO.setReg(NewReg); + MFI.stackifyVReg(NewReg); + Changed = true; + } + + // Coalesce and eliminate COPY instructions. + if (WebAssembly::isCopy(MI.getOpcode())) { + MRI.replaceRegWith(MI.getOperand(1).getReg(), + MI.getOperand(0).getReg()); + MI.eraseFromParent(); + Changed = true; + } + } + } + + // Define the locals. + // TODO: Sort the locals for better compression. + MFI.setNumLocals(CurLocal - MFI.getParams().size()); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { + unsigned Reg = Register::index2VirtReg(I); + auto RL = Reg2Local.find(Reg); + if (RL == Reg2Local.end() || RL->second < MFI.getParams().size()) + continue; + + MFI.setLocal(RL->second - MFI.getParams().size(), + typeForRegClass(MRI.getRegClass(Reg))); + Changed = true; + } + +#ifndef NDEBUG + // Assert that all registers have been stackified at this point. + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (MI.isDebugInstr() || MI.isLabel()) + continue; + for (const MachineOperand &MO : MI.explicit_operands()) { + assert( + (!MO.isReg() || MRI.use_empty(MO.getReg()) || + MFI.isVRegStackified(MO.getReg())) && + "WebAssemblyExplicitLocals failed to stackify a register operand"); + } + } + } +#endif + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp new file mode 100644 index 000000000000..c932f985489a --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -0,0 +1,1401 @@ +//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the WebAssembly-specific support for the FastISel +/// class. Some of the target-specific code is generated by tablegen in the file +/// WebAssemblyGenFastISel.inc, which is #included here. +/// +/// TODO: kill flags +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "wasm-fastisel" + +namespace { + +class WebAssemblyFastISel final : public FastISel { + // All possible address modes. + class Address { + public: + using BaseKind = enum { RegBase, FrameIndexBase }; + + private: + BaseKind Kind = RegBase; + union { + unsigned Reg; + int FI; + } Base; + + int64_t Offset = 0; + + const GlobalValue *GV = nullptr; + + public: + // Innocuous defaults for our address. + Address() { Base.Reg = 0; } + void setKind(BaseKind K) { + assert(!isSet() && "Can't change kind with non-zero base"); + Kind = K; + } + BaseKind getKind() const { return Kind; } + bool isRegBase() const { return Kind == RegBase; } + bool isFIBase() const { return Kind == FrameIndexBase; } + void setReg(unsigned Reg) { + assert(isRegBase() && "Invalid base register access!"); + assert(Base.Reg == 0 && "Overwriting non-zero register"); + Base.Reg = Reg; + } + unsigned getReg() const { + assert(isRegBase() && "Invalid base register access!"); + return Base.Reg; + } + void setFI(unsigned FI) { + assert(isFIBase() && "Invalid base frame index access!"); + assert(Base.FI == 0 && "Overwriting non-zero frame index"); + Base.FI = FI; + } + unsigned getFI() const { + assert(isFIBase() && "Invalid base frame index access!"); + return Base.FI; + } + + void setOffset(int64_t NewOffset) { + assert(NewOffset >= 0 && "Offsets must be non-negative"); + Offset = NewOffset; + } + int64_t getOffset() const { return Offset; } + void setGlobalValue(const GlobalValue *G) { GV = G; } + const GlobalValue *getGlobalValue() const { return GV; } + bool isSet() const { + if (isRegBase()) { + return Base.Reg != 0; + } else { + return Base.FI != 0; + } + } + }; + + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the + /// right decision when generating code for different targets. + const WebAssemblySubtarget *Subtarget; + LLVMContext *Context; + +private: + // Utility helper routines + MVT::SimpleValueType getSimpleType(Type *Ty) { + EVT VT = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true); + return VT.isSimple() ? VT.getSimpleVT().SimpleTy + : MVT::INVALID_SIMPLE_VALUE_TYPE; + } + MVT::SimpleValueType getLegalType(MVT::SimpleValueType VT) { + switch (VT) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + return MVT::i32; + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + case MVT::exnref: + return VT; + case MVT::f16: + return MVT::f32; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + if (Subtarget->hasSIMD128()) + return VT; + break; + case MVT::v2i64: + case MVT::v2f64: + if (Subtarget->hasUnimplementedSIMD128()) + return VT; + break; + default: + break; + } + return MVT::INVALID_SIMPLE_VALUE_TYPE; + } + bool computeAddress(const Value *Obj, Address &Addr); + void materializeLoadStoreOperands(Address &Addr); + void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB, + MachineMemOperand *MMO); + unsigned maskI1Value(unsigned Reg, const Value *V); + unsigned getRegForI1Value(const Value *V, bool &Not); + unsigned zeroExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From); + unsigned signExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From); + unsigned zeroExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From, + MVT::SimpleValueType To); + unsigned signExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From, + MVT::SimpleValueType To); + unsigned getRegForUnsignedValue(const Value *V); + unsigned getRegForSignedValue(const Value *V); + unsigned getRegForPromotedValue(const Value *V, bool IsSigned); + unsigned notValue(unsigned Reg); + unsigned copyValue(unsigned Reg); + + // Backend specific FastISel code. + unsigned fastMaterializeAlloca(const AllocaInst *AI) override; + unsigned fastMaterializeConstant(const Constant *C) override; + bool fastLowerArguments() override; + + // Selection routines. + bool selectCall(const Instruction *I); + bool selectSelect(const Instruction *I); + bool selectTrunc(const Instruction *I); + bool selectZExt(const Instruction *I); + bool selectSExt(const Instruction *I); + bool selectICmp(const Instruction *I); + bool selectFCmp(const Instruction *I); + bool selectBitCast(const Instruction *I); + bool selectLoad(const Instruction *I); + bool selectStore(const Instruction *I); + bool selectBr(const Instruction *I); + bool selectRet(const Instruction *I); + bool selectUnreachable(const Instruction *I); + +public: + // Backend specific FastISel code. + WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) + : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { + Subtarget = &FuncInfo.MF->getSubtarget<WebAssemblySubtarget>(); + Context = &FuncInfo.Fn->getContext(); + } + + bool fastSelectInstruction(const Instruction *I) override; + +#include "WebAssemblyGenFastISel.inc" +}; + +} // end anonymous namespace + +bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { + const User *U = nullptr; + unsigned Opcode = Instruction::UserOp1; + if (const auto *I = dyn_cast<Instruction>(Obj)) { + // Don't walk into other basic blocks unless the object is an alloca from + // another block, otherwise it may not have a virtual register assigned. + if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const auto *C = dyn_cast<ConstantExpr>(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + if (const auto *GV = dyn_cast<GlobalValue>(Obj)) { + if (TLI.isPositionIndependent()) + return false; + if (Addr.getGlobalValue()) + return false; + if (GV->isThreadLocal()) + return false; + Addr.setGlobalValue(GV); + return true; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: { + // Look through bitcasts. + return computeAddress(U->getOperand(0), Addr); + } + case Instruction::IntToPtr: { + // Look past no-op inttoptrs. + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) + return computeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::PtrToInt: { + // Look past no-op ptrtoints. + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) + return computeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::GetElementPtr: { + Address SavedAddr = Addr; + uint64_t TmpOffset = Addr.getOffset(); + // Non-inbounds geps can wrap; wasm's offsets can't. + if (!cast<GEPOperator>(U)->isInBounds()) + goto unsupported_gep; + // Iterate through the GEP folding the constants into offsets where + // we can. + for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); + GTI != E; ++GTI) { + const Value *Op = GTI.getOperand(); + if (StructType *STy = GTI.getStructTypeOrNull()) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); + TmpOffset += SL->getElementOffset(Idx); + } else { + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const auto *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + TmpOffset += CI->getSExtValue() * S; + break; + } + if (S == 1 && Addr.isRegBase() && Addr.getReg() == 0) { + // An unscaled add of a register. Set it as the new base. + unsigned Reg = getRegForValue(Op); + if (Reg == 0) + return false; + Addr.setReg(Reg); + break; + } + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. + auto *CI = cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast<AddOperator>(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } + } + } + // Don't fold in negative offsets. + if (int64_t(TmpOffset) >= 0) { + // Try to grab the base operand now. + Addr.setOffset(TmpOffset); + if (computeAddress(U->getOperand(0), Addr)) + return true; + } + // We failed, restore everything and try the other options. + Addr = SavedAddr; + unsupported_gep: + break; + } + case Instruction::Alloca: { + const auto *AI = cast<AllocaInst>(Obj); + DenseMap<const AllocaInst *, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + if (Addr.isSet()) { + return false; + } + Addr.setKind(Address::FrameIndexBase); + Addr.setFI(SI->second); + return true; + } + break; + } + case Instruction::Add: { + // Adds of constants are common and easy enough. + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + if (isa<ConstantInt>(LHS)) + std::swap(LHS, RHS); + + if (const auto *CI = dyn_cast<ConstantInt>(RHS)) { + uint64_t TmpOffset = Addr.getOffset() + CI->getSExtValue(); + if (int64_t(TmpOffset) >= 0) { + Addr.setOffset(TmpOffset); + return computeAddress(LHS, Addr); + } + } + + Address Backup = Addr; + if (computeAddress(LHS, Addr) && computeAddress(RHS, Addr)) + return true; + Addr = Backup; + + break; + } + case Instruction::Sub: { + // Subs of constants are common and easy enough. + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + if (const auto *CI = dyn_cast<ConstantInt>(RHS)) { + int64_t TmpOffset = Addr.getOffset() - CI->getSExtValue(); + if (TmpOffset >= 0) { + Addr.setOffset(TmpOffset); + return computeAddress(LHS, Addr); + } + } + break; + } + } + if (Addr.isSet()) { + return false; + } + unsigned Reg = getRegForValue(Obj); + if (Reg == 0) + return false; + Addr.setReg(Reg); + return Addr.getReg() != 0; +} + +void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) { + if (Addr.isRegBase()) { + unsigned Reg = Addr.getReg(); + if (Reg == 0) { + Reg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass + : &WebAssembly::I32RegClass); + unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64 + : WebAssembly::CONST_I32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Reg) + .addImm(0); + Addr.setReg(Reg); + } + } +} + +void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr, + const MachineInstrBuilder &MIB, + MachineMemOperand *MMO) { + // Set the alignment operand (this is rewritten in SetP2AlignOperands). + // TODO: Disable SetP2AlignOperands for FastISel and just do it here. + MIB.addImm(0); + + if (const GlobalValue *GV = Addr.getGlobalValue()) + MIB.addGlobalAddress(GV, Addr.getOffset()); + else + MIB.addImm(Addr.getOffset()); + + if (Addr.isRegBase()) + MIB.addReg(Addr.getReg()); + else + MIB.addFrameIndex(Addr.getFI()); + + MIB.addMemOperand(MMO); +} + +unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) { + return zeroExtendToI32(Reg, V, MVT::i1); +} + +unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) { + if (const auto *ICmp = dyn_cast<ICmpInst>(V)) + if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1))) + if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) { + Not = ICmp->isTrueWhenEqual(); + return getRegForValue(ICmp->getOperand(0)); + } + + Value *NotV; + if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) { + Not = true; + return getRegForValue(NotV); + } + + Not = false; + unsigned Reg = getRegForValue(V); + if (Reg == 0) + return 0; + return maskI1Value(Reg, V); +} + +unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From) { + if (Reg == 0) + return 0; + + switch (From) { + case MVT::i1: + // If the value is naturally an i1, we don't need to mask it. We only know + // if a value is naturally an i1 if it is definitely lowered by FastISel, + // not a DAG ISel fallback. + if (V != nullptr && isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr()) + return copyValue(Reg); + break; + case MVT::i8: + case MVT::i16: + break; + case MVT::i32: + return copyValue(Reg); + default: + return 0; + } + + unsigned Imm = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::CONST_I32), Imm) + .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits())); + + unsigned Result = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::AND_I32), Result) + .addReg(Reg) + .addReg(Imm); + + return Result; +} + +unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From) { + if (Reg == 0) + return 0; + + switch (From) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + break; + case MVT::i32: + return copyValue(Reg); + default: + return 0; + } + + unsigned Imm = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::CONST_I32), Imm) + .addImm(32 - MVT(From).getSizeInBits()); + + unsigned Left = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::SHL_I32), Left) + .addReg(Reg) + .addReg(Imm); + + unsigned Right = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::SHR_S_I32), Right) + .addReg(Left) + .addReg(Imm); + + return Right; +} + +unsigned WebAssemblyFastISel::zeroExtend(unsigned Reg, const Value *V, + MVT::SimpleValueType From, + MVT::SimpleValueType To) { + if (To == MVT::i64) { + if (From == MVT::i64) + return copyValue(Reg); + + Reg = zeroExtendToI32(Reg, V, From); + + unsigned Result = createResultReg(&WebAssembly::I64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::I64_EXTEND_U_I32), Result) + .addReg(Reg); + return Result; + } + + if (To == MVT::i32) + return zeroExtendToI32(Reg, V, From); + + return 0; +} + +unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V, + MVT::SimpleValueType From, + MVT::SimpleValueType To) { + if (To == MVT::i64) { + if (From == MVT::i64) + return copyValue(Reg); + + Reg = signExtendToI32(Reg, V, From); + + unsigned Result = createResultReg(&WebAssembly::I64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::I64_EXTEND_S_I32), Result) + .addReg(Reg); + return Result; + } + + if (To == MVT::i32) + return signExtendToI32(Reg, V, From); + + return 0; +} + +unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) { + MVT::SimpleValueType From = getSimpleType(V->getType()); + MVT::SimpleValueType To = getLegalType(From); + unsigned VReg = getRegForValue(V); + if (VReg == 0) + return 0; + return zeroExtend(VReg, V, From, To); +} + +unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) { + MVT::SimpleValueType From = getSimpleType(V->getType()); + MVT::SimpleValueType To = getLegalType(From); + unsigned VReg = getRegForValue(V); + if (VReg == 0) + return 0; + return signExtend(VReg, V, From, To); +} + +unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V, + bool IsSigned) { + return IsSigned ? getRegForSignedValue(V) : getRegForUnsignedValue(V); +} + +unsigned WebAssemblyFastISel::notValue(unsigned Reg) { + assert(MRI.getRegClass(Reg) == &WebAssembly::I32RegClass); + + unsigned NotReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::EQZ_I32), NotReg) + .addReg(Reg); + return NotReg; +} + +unsigned WebAssemblyFastISel::copyValue(unsigned Reg) { + unsigned ResultReg = createResultReg(MRI.getRegClass(Reg)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(WebAssembly::COPY), + ResultReg) + .addReg(Reg); + return ResultReg; +} + +unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) { + DenseMap<const AllocaInst *, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned ResultReg = + createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass + : &WebAssembly::I32RegClass); + unsigned Opc = + Subtarget->hasAddr64() ? WebAssembly::COPY_I64 : WebAssembly::COPY_I32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addFrameIndex(SI->second); + return ResultReg; + } + + return 0; +} + +unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) { + if (TLI.isPositionIndependent()) + return 0; + if (GV->isThreadLocal()) + return 0; + unsigned ResultReg = + createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass + : &WebAssembly::I32RegClass); + unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64 + : WebAssembly::CONST_I32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addGlobalAddress(GV); + return ResultReg; + } + + // Let target-independent code handle it. + return 0; +} + +bool WebAssemblyFastISel::fastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + unsigned I = 0; + for (auto const &Arg : F->args()) { + const AttributeList &Attrs = F->getAttributes(); + if (Attrs.hasParamAttribute(I, Attribute::ByVal) || + Attrs.hasParamAttribute(I, Attribute::SwiftSelf) || + Attrs.hasParamAttribute(I, Attribute::SwiftError) || + Attrs.hasParamAttribute(I, Attribute::InAlloca) || + Attrs.hasParamAttribute(I, Attribute::Nest)) + return false; + + Type *ArgTy = Arg.getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy()) + return false; + if (!Subtarget->hasSIMD128() && ArgTy->isVectorTy()) + return false; + + unsigned Opc; + const TargetRegisterClass *RC; + switch (getSimpleType(ArgTy)) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + Opc = WebAssembly::ARGUMENT_i32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i64: + Opc = WebAssembly::ARGUMENT_i64; + RC = &WebAssembly::I64RegClass; + break; + case MVT::f32: + Opc = WebAssembly::ARGUMENT_f32; + RC = &WebAssembly::F32RegClass; + break; + case MVT::f64: + Opc = WebAssembly::ARGUMENT_f64; + RC = &WebAssembly::F64RegClass; + break; + case MVT::v16i8: + Opc = WebAssembly::ARGUMENT_v16i8; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v8i16: + Opc = WebAssembly::ARGUMENT_v8i16; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v4i32: + Opc = WebAssembly::ARGUMENT_v4i32; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v2i64: + Opc = WebAssembly::ARGUMENT_v2i64; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v4f32: + Opc = WebAssembly::ARGUMENT_v4f32; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v2f64: + Opc = WebAssembly::ARGUMENT_v2f64; + RC = &WebAssembly::V128RegClass; + break; + case MVT::exnref: + Opc = WebAssembly::ARGUMENT_exnref; + RC = &WebAssembly::EXNREFRegClass; + break; + default: + return false; + } + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addImm(I); + updateValueMap(&Arg, ResultReg); + + ++I; + } + + MRI.addLiveIn(WebAssembly::ARGUMENTS); + + auto *MFI = MF->getInfo<WebAssemblyFunctionInfo>(); + for (auto const &Arg : F->args()) { + MVT::SimpleValueType ArgTy = getLegalType(getSimpleType(Arg.getType())); + if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE) { + MFI->clearParamsAndResults(); + return false; + } + MFI->addParam(ArgTy); + } + + if (!F->getReturnType()->isVoidTy()) { + MVT::SimpleValueType RetTy = + getLegalType(getSimpleType(F->getReturnType())); + if (RetTy == MVT::INVALID_SIMPLE_VALUE_TYPE) { + MFI->clearParamsAndResults(); + return false; + } + MFI->addResult(RetTy); + } + + return true; +} + +bool WebAssemblyFastISel::selectCall(const Instruction *I) { + const auto *Call = cast<CallInst>(I); + + // TODO: Support tail calls in FastISel + if (Call->isMustTailCall() || Call->isInlineAsm() || + Call->getFunctionType()->isVarArg()) + return false; + + Function *Func = Call->getCalledFunction(); + if (Func && Func->isIntrinsic()) + return false; + + bool IsDirect = Func != nullptr; + if (!IsDirect && isa<ConstantExpr>(Call->getCalledValue())) + return false; + + FunctionType *FuncTy = Call->getFunctionType(); + unsigned Opc; + bool IsVoid = FuncTy->getReturnType()->isVoidTy(); + unsigned ResultReg; + if (IsVoid) { + Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::PCALL_INDIRECT_VOID; + } else { + if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy()) + return false; + + MVT::SimpleValueType RetTy = getSimpleType(Call->getType()); + switch (RetTy) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + Opc = IsDirect ? WebAssembly::CALL_i32 : WebAssembly::PCALL_INDIRECT_i32; + ResultReg = createResultReg(&WebAssembly::I32RegClass); + break; + case MVT::i64: + Opc = IsDirect ? WebAssembly::CALL_i64 : WebAssembly::PCALL_INDIRECT_i64; + ResultReg = createResultReg(&WebAssembly::I64RegClass); + break; + case MVT::f32: + Opc = IsDirect ? WebAssembly::CALL_f32 : WebAssembly::PCALL_INDIRECT_f32; + ResultReg = createResultReg(&WebAssembly::F32RegClass); + break; + case MVT::f64: + Opc = IsDirect ? WebAssembly::CALL_f64 : WebAssembly::PCALL_INDIRECT_f64; + ResultReg = createResultReg(&WebAssembly::F64RegClass); + break; + case MVT::v16i8: + Opc = IsDirect ? WebAssembly::CALL_v16i8 + : WebAssembly::PCALL_INDIRECT_v16i8; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v8i16: + Opc = IsDirect ? WebAssembly::CALL_v8i16 + : WebAssembly::PCALL_INDIRECT_v8i16; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v4i32: + Opc = IsDirect ? WebAssembly::CALL_v4i32 + : WebAssembly::PCALL_INDIRECT_v4i32; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v2i64: + Opc = IsDirect ? WebAssembly::CALL_v2i64 + : WebAssembly::PCALL_INDIRECT_v2i64; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v4f32: + Opc = IsDirect ? WebAssembly::CALL_v4f32 + : WebAssembly::PCALL_INDIRECT_v4f32; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v2f64: + Opc = IsDirect ? WebAssembly::CALL_v2f64 + : WebAssembly::PCALL_INDIRECT_v2f64; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::exnref: + Opc = IsDirect ? WebAssembly::CALL_exnref + : WebAssembly::PCALL_INDIRECT_exnref; + ResultReg = createResultReg(&WebAssembly::EXNREFRegClass); + break; + default: + return false; + } + } + + SmallVector<unsigned, 8> Args; + for (unsigned I = 0, E = Call->getNumArgOperands(); I < E; ++I) { + Value *V = Call->getArgOperand(I); + MVT::SimpleValueType ArgTy = getSimpleType(V->getType()); + if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + return false; + + const AttributeList &Attrs = Call->getAttributes(); + if (Attrs.hasParamAttribute(I, Attribute::ByVal) || + Attrs.hasParamAttribute(I, Attribute::SwiftSelf) || + Attrs.hasParamAttribute(I, Attribute::SwiftError) || + Attrs.hasParamAttribute(I, Attribute::InAlloca) || + Attrs.hasParamAttribute(I, Attribute::Nest)) + return false; + + unsigned Reg; + + if (Attrs.hasParamAttribute(I, Attribute::SExt)) + Reg = getRegForSignedValue(V); + else if (Attrs.hasParamAttribute(I, Attribute::ZExt)) + Reg = getRegForUnsignedValue(V); + else + Reg = getRegForValue(V); + + if (Reg == 0) + return false; + + Args.push_back(Reg); + } + + unsigned CalleeReg = 0; + if (!IsDirect) { + CalleeReg = getRegForValue(Call->getCalledValue()); + if (!CalleeReg) + return false; + } + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + + if (!IsVoid) + MIB.addReg(ResultReg, RegState::Define); + + if (IsDirect) + MIB.addGlobalAddress(Func); + else + MIB.addReg(CalleeReg); + + for (unsigned ArgReg : Args) + MIB.addReg(ArgReg); + + if (!IsVoid) + updateValueMap(Call, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectSelect(const Instruction *I) { + const auto *Select = cast<SelectInst>(I); + + bool Not; + unsigned CondReg = getRegForI1Value(Select->getCondition(), Not); + if (CondReg == 0) + return false; + + unsigned TrueReg = getRegForValue(Select->getTrueValue()); + if (TrueReg == 0) + return false; + + unsigned FalseReg = getRegForValue(Select->getFalseValue()); + if (FalseReg == 0) + return false; + + if (Not) + std::swap(TrueReg, FalseReg); + + unsigned Opc; + const TargetRegisterClass *RC; + switch (getSimpleType(Select->getType())) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + Opc = WebAssembly::SELECT_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i64: + Opc = WebAssembly::SELECT_I64; + RC = &WebAssembly::I64RegClass; + break; + case MVT::f32: + Opc = WebAssembly::SELECT_F32; + RC = &WebAssembly::F32RegClass; + break; + case MVT::f64: + Opc = WebAssembly::SELECT_F64; + RC = &WebAssembly::F64RegClass; + break; + case MVT::exnref: + Opc = WebAssembly::SELECT_EXNREF; + RC = &WebAssembly::EXNREFRegClass; + break; + default: + return false; + } + + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(TrueReg) + .addReg(FalseReg) + .addReg(CondReg); + + updateValueMap(Select, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectTrunc(const Instruction *I) { + const auto *Trunc = cast<TruncInst>(I); + + unsigned Reg = getRegForValue(Trunc->getOperand(0)); + if (Reg == 0) + return false; + + if (Trunc->getOperand(0)->getType()->isIntegerTy(64)) { + unsigned Result = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::I32_WRAP_I64), Result) + .addReg(Reg); + Reg = Result; + } + + updateValueMap(Trunc, Reg); + return true; +} + +bool WebAssemblyFastISel::selectZExt(const Instruction *I) { + const auto *ZExt = cast<ZExtInst>(I); + + const Value *Op = ZExt->getOperand(0); + MVT::SimpleValueType From = getSimpleType(Op->getType()); + MVT::SimpleValueType To = getLegalType(getSimpleType(ZExt->getType())); + unsigned In = getRegForValue(Op); + if (In == 0) + return false; + unsigned Reg = zeroExtend(In, Op, From, To); + if (Reg == 0) + return false; + + updateValueMap(ZExt, Reg); + return true; +} + +bool WebAssemblyFastISel::selectSExt(const Instruction *I) { + const auto *SExt = cast<SExtInst>(I); + + const Value *Op = SExt->getOperand(0); + MVT::SimpleValueType From = getSimpleType(Op->getType()); + MVT::SimpleValueType To = getLegalType(getSimpleType(SExt->getType())); + unsigned In = getRegForValue(Op); + if (In == 0) + return false; + unsigned Reg = signExtend(In, Op, From, To); + if (Reg == 0) + return false; + + updateValueMap(SExt, Reg); + return true; +} + +bool WebAssemblyFastISel::selectICmp(const Instruction *I) { + const auto *ICmp = cast<ICmpInst>(I); + + bool I32 = getSimpleType(ICmp->getOperand(0)->getType()) != MVT::i64; + unsigned Opc; + bool IsSigned = false; + switch (ICmp->getPredicate()) { + case ICmpInst::ICMP_EQ: + Opc = I32 ? WebAssembly::EQ_I32 : WebAssembly::EQ_I64; + break; + case ICmpInst::ICMP_NE: + Opc = I32 ? WebAssembly::NE_I32 : WebAssembly::NE_I64; + break; + case ICmpInst::ICMP_UGT: + Opc = I32 ? WebAssembly::GT_U_I32 : WebAssembly::GT_U_I64; + break; + case ICmpInst::ICMP_UGE: + Opc = I32 ? WebAssembly::GE_U_I32 : WebAssembly::GE_U_I64; + break; + case ICmpInst::ICMP_ULT: + Opc = I32 ? WebAssembly::LT_U_I32 : WebAssembly::LT_U_I64; + break; + case ICmpInst::ICMP_ULE: + Opc = I32 ? WebAssembly::LE_U_I32 : WebAssembly::LE_U_I64; + break; + case ICmpInst::ICMP_SGT: + Opc = I32 ? WebAssembly::GT_S_I32 : WebAssembly::GT_S_I64; + IsSigned = true; + break; + case ICmpInst::ICMP_SGE: + Opc = I32 ? WebAssembly::GE_S_I32 : WebAssembly::GE_S_I64; + IsSigned = true; + break; + case ICmpInst::ICMP_SLT: + Opc = I32 ? WebAssembly::LT_S_I32 : WebAssembly::LT_S_I64; + IsSigned = true; + break; + case ICmpInst::ICMP_SLE: + Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64; + IsSigned = true; + break; + default: + return false; + } + + unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), IsSigned); + if (LHS == 0) + return false; + + unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), IsSigned); + if (RHS == 0) + return false; + + unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHS) + .addReg(RHS); + updateValueMap(ICmp, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectFCmp(const Instruction *I) { + const auto *FCmp = cast<FCmpInst>(I); + + unsigned LHS = getRegForValue(FCmp->getOperand(0)); + if (LHS == 0) + return false; + + unsigned RHS = getRegForValue(FCmp->getOperand(1)); + if (RHS == 0) + return false; + + bool F32 = getSimpleType(FCmp->getOperand(0)->getType()) != MVT::f64; + unsigned Opc; + bool Not = false; + switch (FCmp->getPredicate()) { + case FCmpInst::FCMP_OEQ: + Opc = F32 ? WebAssembly::EQ_F32 : WebAssembly::EQ_F64; + break; + case FCmpInst::FCMP_UNE: + Opc = F32 ? WebAssembly::NE_F32 : WebAssembly::NE_F64; + break; + case FCmpInst::FCMP_OGT: + Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64; + break; + case FCmpInst::FCMP_OGE: + Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64; + break; + case FCmpInst::FCMP_OLT: + Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64; + break; + case FCmpInst::FCMP_OLE: + Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64; + break; + case FCmpInst::FCMP_UGT: + Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64; + Not = true; + break; + case FCmpInst::FCMP_UGE: + Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64; + Not = true; + break; + case FCmpInst::FCMP_ULT: + Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64; + Not = true; + break; + case FCmpInst::FCMP_ULE: + Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64; + Not = true; + break; + default: + return false; + } + + unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHS) + .addReg(RHS); + + if (Not) + ResultReg = notValue(ResultReg); + + updateValueMap(FCmp, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectBitCast(const Instruction *I) { + // Target-independent code can handle this, except it doesn't set the dead + // flag on the ARGUMENTS clobber, so we have to do that manually in order + // to satisfy code that expects this of isBitcast() instructions. + EVT VT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT RetVT = TLI.getValueType(DL, I->getType()); + if (!VT.isSimple() || !RetVT.isSimple()) + return false; + + unsigned In = getRegForValue(I->getOperand(0)); + if (In == 0) + return false; + + if (VT == RetVT) { + // No-op bitcast. + updateValueMap(I, In); + return true; + } + + Register Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(), + In, I->getOperand(0)->hasOneUse()); + if (!Reg) + return false; + MachineBasicBlock::iterator Iter = FuncInfo.InsertPt; + --Iter; + assert(Iter->isBitcast()); + Iter->setPhysRegsDeadExcept(ArrayRef<Register>(), TRI); + updateValueMap(I, Reg); + return true; +} + +bool WebAssemblyFastISel::selectLoad(const Instruction *I) { + const auto *Load = cast<LoadInst>(I); + if (Load->isAtomic()) + return false; + if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy()) + return false; + + Address Addr; + if (!computeAddress(Load->getPointerOperand(), Addr)) + return false; + + // TODO: Fold a following sign-/zero-extend into the load instruction. + + unsigned Opc; + const TargetRegisterClass *RC; + switch (getSimpleType(Load->getType())) { + case MVT::i1: + case MVT::i8: + Opc = WebAssembly::LOAD8_U_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i16: + Opc = WebAssembly::LOAD16_U_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i32: + Opc = WebAssembly::LOAD_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i64: + Opc = WebAssembly::LOAD_I64; + RC = &WebAssembly::I64RegClass; + break; + case MVT::f32: + Opc = WebAssembly::LOAD_F32; + RC = &WebAssembly::F32RegClass; + break; + case MVT::f64: + Opc = WebAssembly::LOAD_F64; + RC = &WebAssembly::F64RegClass; + break; + default: + return false; + } + + materializeLoadStoreOperands(Addr); + + unsigned ResultReg = createResultReg(RC); + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg); + + addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Load)); + + updateValueMap(Load, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectStore(const Instruction *I) { + const auto *Store = cast<StoreInst>(I); + if (Store->isAtomic()) + return false; + if (!Subtarget->hasSIMD128() && + Store->getValueOperand()->getType()->isVectorTy()) + return false; + + Address Addr; + if (!computeAddress(Store->getPointerOperand(), Addr)) + return false; + + unsigned Opc; + bool VTIsi1 = false; + switch (getSimpleType(Store->getValueOperand()->getType())) { + case MVT::i1: + VTIsi1 = true; + LLVM_FALLTHROUGH; + case MVT::i8: + Opc = WebAssembly::STORE8_I32; + break; + case MVT::i16: + Opc = WebAssembly::STORE16_I32; + break; + case MVT::i32: + Opc = WebAssembly::STORE_I32; + break; + case MVT::i64: + Opc = WebAssembly::STORE_I64; + break; + case MVT::f32: + Opc = WebAssembly::STORE_F32; + break; + case MVT::f64: + Opc = WebAssembly::STORE_F64; + break; + default: + return false; + } + + materializeLoadStoreOperands(Addr); + + unsigned ValueReg = getRegForValue(Store->getValueOperand()); + if (ValueReg == 0) + return false; + if (VTIsi1) + ValueReg = maskI1Value(ValueReg, Store->getValueOperand()); + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + + addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Store)); + + MIB.addReg(ValueReg); + return true; +} + +bool WebAssemblyFastISel::selectBr(const Instruction *I) { + const auto *Br = cast<BranchInst>(I); + if (Br->isUnconditional()) { + MachineBasicBlock *MSucc = FuncInfo.MBBMap[Br->getSuccessor(0)]; + fastEmitBranch(MSucc, Br->getDebugLoc()); + return true; + } + + MachineBasicBlock *TBB = FuncInfo.MBBMap[Br->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)]; + + bool Not; + unsigned CondReg = getRegForI1Value(Br->getCondition(), Not); + if (CondReg == 0) + return false; + + unsigned Opc = WebAssembly::BR_IF; + if (Not) + Opc = WebAssembly::BR_UNLESS; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addMBB(TBB) + .addReg(CondReg); + + finishCondBranch(Br->getParent(), TBB, FBB); + return true; +} + +bool WebAssemblyFastISel::selectRet(const Instruction *I) { + if (!FuncInfo.CanLowerReturn) + return false; + + const auto *Ret = cast<ReturnInst>(I); + + if (Ret->getNumOperands() == 0) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::RETURN)); + return true; + } + + // TODO: support multiple return in FastISel + if (Ret->getNumOperands() > 1) + return false; + + Value *RV = Ret->getOperand(0); + if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy()) + return false; + + switch (getSimpleType(RV->getType())) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + case MVT::exnref: + break; + default: + return false; + } + + unsigned Reg; + if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::SExt)) + Reg = getRegForSignedValue(RV); + else if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::ZExt)) + Reg = getRegForUnsignedValue(RV); + else + Reg = getRegForValue(RV); + + if (Reg == 0) + return false; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::RETURN)) + .addReg(Reg); + return true; +} + +bool WebAssemblyFastISel::selectUnreachable(const Instruction *I) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::UNREACHABLE)); + return true; +} + +bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Call: + if (selectCall(I)) + return true; + break; + case Instruction::Select: + return selectSelect(I); + case Instruction::Trunc: + return selectTrunc(I); + case Instruction::ZExt: + return selectZExt(I); + case Instruction::SExt: + return selectSExt(I); + case Instruction::ICmp: + return selectICmp(I); + case Instruction::FCmp: + return selectFCmp(I); + case Instruction::BitCast: + return selectBitCast(I); + case Instruction::Load: + return selectLoad(I); + case Instruction::Store: + return selectStore(I); + case Instruction::Br: + return selectBr(I); + case Instruction::Ret: + return selectRet(I); + case Instruction::Unreachable: + return selectUnreachable(I); + default: + break; + } + + // Fall back to target-independent instruction selection. + return selectOperator(I, I->getOpcode()); +} + +FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) { + return new WebAssemblyFastISel(FuncInfo, LibInfo); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp new file mode 100644 index 000000000000..6b1bbd7a2b07 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -0,0 +1,323 @@ +//===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Fix bitcasted functions. +/// +/// WebAssembly requires caller and callee signatures to match, however in LLVM, +/// some amount of slop is vaguely permitted. Detect mismatch by looking for +/// bitcasts of functions and rewrite them to use wrapper functions instead. +/// +/// This doesn't catch all cases, such as when a function's address is taken in +/// one place and casted in another, but it works for many common cases. +/// +/// Note that LLVM already optimizes away function bitcasts in common cases by +/// dropping arguments as needed, so this pass only ends up getting used in less +/// common cases. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-fix-function-bitcasts" + +namespace { +class FixFunctionBitcasts final : public ModulePass { + StringRef getPassName() const override { + return "WebAssembly Fix Function Bitcasts"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override; + +public: + static char ID; + FixFunctionBitcasts() : ModulePass(ID) {} +}; +} // End anonymous namespace + +char FixFunctionBitcasts::ID = 0; +INITIALIZE_PASS(FixFunctionBitcasts, DEBUG_TYPE, + "Fix mismatching bitcasts for WebAssembly", false, false) + +ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() { + return new FixFunctionBitcasts(); +} + +// Recursively descend the def-use lists from V to find non-bitcast users of +// bitcasts of V. +static void findUses(Value *V, Function &F, + SmallVectorImpl<std::pair<Use *, Function *>> &Uses, + SmallPtrSetImpl<Constant *> &ConstantBCs) { + for (Use &U : V->uses()) { + if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) + findUses(BC, F, Uses, ConstantBCs); + else if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) + findUses(A, F, Uses, ConstantBCs); + else if (U.get()->getType() != F.getType()) { + CallSite CS(U.getUser()); + if (!CS) + // Skip uses that aren't immediately called + continue; + Value *Callee = CS.getCalledValue(); + if (Callee != V) + // Skip calls where the function isn't the callee + continue; + if (isa<Constant>(U.get())) { + // Only add constant bitcasts to the list once; they get RAUW'd + auto C = ConstantBCs.insert(cast<Constant>(U.get())); + if (!C.second) + continue; + } + Uses.push_back(std::make_pair(&U, &F)); + } + } +} + +// Create a wrapper function with type Ty that calls F (which may have a +// different type). Attempt to support common bitcasted function idioms: +// - Call with more arguments than needed: arguments are dropped +// - Call with fewer arguments than needed: arguments are filled in with undef +// - Return value is not needed: drop it +// - Return value needed but not present: supply an undef +// +// If the all the argument types of trivially castable to one another (i.e. +// I32 vs pointer type) then we don't create a wrapper at all (return nullptr +// instead). +// +// If there is a type mismatch that we know would result in an invalid wasm +// module then generate wrapper that contains unreachable (i.e. abort at +// runtime). Such programs are deep into undefined behaviour territory, +// but we choose to fail at runtime rather than generate and invalid module +// or fail at compiler time. The reason we delay the error is that we want +// to support the CMake which expects to be able to compile and link programs +// that refer to functions with entirely incorrect signatures (this is how +// CMake detects the existence of a function in a toolchain). +// +// For bitcasts that involve struct types we don't know at this stage if they +// would be equivalent at the wasm level and so we can't know if we need to +// generate a wrapper. +static Function *createWrapper(Function *F, FunctionType *Ty) { + Module *M = F->getParent(); + + Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage, + F->getName() + "_bitcast", M); + BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper); + const DataLayout &DL = BB->getModule()->getDataLayout(); + + // Determine what arguments to pass. + SmallVector<Value *, 4> Args; + Function::arg_iterator AI = Wrapper->arg_begin(); + Function::arg_iterator AE = Wrapper->arg_end(); + FunctionType::param_iterator PI = F->getFunctionType()->param_begin(); + FunctionType::param_iterator PE = F->getFunctionType()->param_end(); + bool TypeMismatch = false; + bool WrapperNeeded = false; + + Type *ExpectedRtnType = F->getFunctionType()->getReturnType(); + Type *RtnType = Ty->getReturnType(); + + if ((F->getFunctionType()->getNumParams() != Ty->getNumParams()) || + (F->getFunctionType()->isVarArg() != Ty->isVarArg()) || + (ExpectedRtnType != RtnType)) + WrapperNeeded = true; + + for (; AI != AE && PI != PE; ++AI, ++PI) { + Type *ArgType = AI->getType(); + Type *ParamType = *PI; + + if (ArgType == ParamType) { + Args.push_back(&*AI); + } else { + if (CastInst::isBitOrNoopPointerCastable(ArgType, ParamType, DL)) { + Instruction *PtrCast = + CastInst::CreateBitOrPointerCast(AI, ParamType, "cast"); + BB->getInstList().push_back(PtrCast); + Args.push_back(PtrCast); + } else if (ArgType->isStructTy() || ParamType->isStructTy()) { + LLVM_DEBUG(dbgs() << "createWrapper: struct param type in bitcast: " + << F->getName() << "\n"); + WrapperNeeded = false; + } else { + LLVM_DEBUG(dbgs() << "createWrapper: arg type mismatch calling: " + << F->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Arg[" << Args.size() << "] Expected: " + << *ParamType << " Got: " << *ArgType << "\n"); + TypeMismatch = true; + break; + } + } + } + + if (WrapperNeeded && !TypeMismatch) { + for (; PI != PE; ++PI) + Args.push_back(UndefValue::get(*PI)); + if (F->isVarArg()) + for (; AI != AE; ++AI) + Args.push_back(&*AI); + + CallInst *Call = CallInst::Create(F, Args, "", BB); + + Type *ExpectedRtnType = F->getFunctionType()->getReturnType(); + Type *RtnType = Ty->getReturnType(); + // Determine what value to return. + if (RtnType->isVoidTy()) { + ReturnInst::Create(M->getContext(), BB); + } else if (ExpectedRtnType->isVoidTy()) { + LLVM_DEBUG(dbgs() << "Creating dummy return: " << *RtnType << "\n"); + ReturnInst::Create(M->getContext(), UndefValue::get(RtnType), BB); + } else if (RtnType == ExpectedRtnType) { + ReturnInst::Create(M->getContext(), Call, BB); + } else if (CastInst::isBitOrNoopPointerCastable(ExpectedRtnType, RtnType, + DL)) { + Instruction *Cast = + CastInst::CreateBitOrPointerCast(Call, RtnType, "cast"); + BB->getInstList().push_back(Cast); + ReturnInst::Create(M->getContext(), Cast, BB); + } else if (RtnType->isStructTy() || ExpectedRtnType->isStructTy()) { + LLVM_DEBUG(dbgs() << "createWrapper: struct return type in bitcast: " + << F->getName() << "\n"); + WrapperNeeded = false; + } else { + LLVM_DEBUG(dbgs() << "createWrapper: return type mismatch calling: " + << F->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Expected: " << *ExpectedRtnType + << " Got: " << *RtnType << "\n"); + TypeMismatch = true; + } + } + + if (TypeMismatch) { + // Create a new wrapper that simply contains `unreachable`. + Wrapper->eraseFromParent(); + Wrapper = Function::Create(Ty, Function::PrivateLinkage, + F->getName() + "_bitcast_invalid", M); + BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper); + new UnreachableInst(M->getContext(), BB); + Wrapper->setName(F->getName() + "_bitcast_invalid"); + } else if (!WrapperNeeded) { + LLVM_DEBUG(dbgs() << "createWrapper: no wrapper needed: " << F->getName() + << "\n"); + Wrapper->eraseFromParent(); + return nullptr; + } + LLVM_DEBUG(dbgs() << "createWrapper: " << F->getName() << "\n"); + return Wrapper; +} + +// Test whether a main function with type FuncTy should be rewritten to have +// type MainTy. +static bool shouldFixMainFunction(FunctionType *FuncTy, FunctionType *MainTy) { + // Only fix the main function if it's the standard zero-arg form. That way, + // the standard cases will work as expected, and users will see signature + // mismatches from the linker for non-standard cases. + return FuncTy->getReturnType() == MainTy->getReturnType() && + FuncTy->getNumParams() == 0 && + !FuncTy->isVarArg(); +} + +bool FixFunctionBitcasts::runOnModule(Module &M) { + LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n"); + + Function *Main = nullptr; + CallInst *CallMain = nullptr; + SmallVector<std::pair<Use *, Function *>, 0> Uses; + SmallPtrSet<Constant *, 2> ConstantBCs; + + // Collect all the places that need wrappers. + for (Function &F : M) { + findUses(&F, F, Uses, ConstantBCs); + + // If we have a "main" function, and its type isn't + // "int main(int argc, char *argv[])", create an artificial call with it + // bitcasted to that type so that we generate a wrapper for it, so that + // the C runtime can call it. + if (F.getName() == "main") { + Main = &F; + LLVMContext &C = M.getContext(); + Type *MainArgTys[] = {Type::getInt32Ty(C), + PointerType::get(Type::getInt8PtrTy(C), 0)}; + FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys, + /*isVarArg=*/false); + if (shouldFixMainFunction(F.getFunctionType(), MainTy)) { + LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: " + << *F.getFunctionType() << "\n"); + Value *Args[] = {UndefValue::get(MainArgTys[0]), + UndefValue::get(MainArgTys[1])}; + Value *Casted = + ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0)); + CallMain = CallInst::Create(MainTy, Casted, Args, "call_main"); + Use *UseMain = &CallMain->getOperandUse(2); + Uses.push_back(std::make_pair(UseMain, &F)); + } + } + } + + DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers; + + for (auto &UseFunc : Uses) { + Use *U = UseFunc.first; + Function *F = UseFunc.second; + auto *PTy = cast<PointerType>(U->get()->getType()); + auto *Ty = dyn_cast<FunctionType>(PTy->getElementType()); + + // If the function is casted to something like i8* as a "generic pointer" + // to be later casted to something else, we can't generate a wrapper for it. + // Just ignore such casts for now. + if (!Ty) + continue; + + auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr)); + if (Pair.second) + Pair.first->second = createWrapper(F, Ty); + + Function *Wrapper = Pair.first->second; + if (!Wrapper) + continue; + + if (isa<Constant>(U->get())) + U->get()->replaceAllUsesWith(Wrapper); + else + U->set(Wrapper); + } + + // If we created a wrapper for main, rename the wrapper so that it's the + // one that gets called from startup. + if (CallMain) { + Main->setName("__original_main"); + auto *MainWrapper = + cast<Function>(CallMain->getCalledValue()->stripPointerCasts()); + delete CallMain; + if (Main->isDeclaration()) { + // The wrapper is not needed in this case as we don't need to export + // it to anyone else. + MainWrapper->eraseFromParent(); + } else { + // Otherwise give the wrapper the same linkage as the original main + // function, so that it can be called from the same places. + MainWrapper->setName("main"); + MainWrapper->setLinkage(Main->getLinkage()); + MainWrapper->setVisibility(Main->getVisibility()); + } + } + + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp new file mode 100644 index 000000000000..157ea9d525c9 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -0,0 +1,502 @@ +//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a pass that removes irreducible control flow. +/// Irreducible control flow means multiple-entry loops, which this pass +/// transforms to have a single entry. +/// +/// Note that LLVM has a generic pass that lowers irreducible control flow, but +/// it linearizes control flow, turning diamonds into two triangles, which is +/// both unnecessary and undesirable for WebAssembly. +/// +/// The big picture: We recursively process each "region", defined as a group +/// of blocks with a single entry and no branches back to that entry. A region +/// may be the entire function body, or the inner part of a loop, i.e., the +/// loop's body without branches back to the loop entry. In each region we fix +/// up multi-entry loops by adding a new block that can dispatch to each of the +/// loop entries, based on the value of a label "helper" variable, and we +/// replace direct branches to the entries with assignments to the label +/// variable and a branch to the dispatch block. Then the dispatch block is the +/// single entry in the loop containing the previous multiple entries. After +/// ensuring all the loops in a region are reducible, we recurse into them. The +/// total time complexity of this pass is: +/// +/// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops + +/// NumLoops * NumLoops) +/// +/// This pass is similar to what the Relooper [1] does. Both identify looping +/// code that requires multiple entries, and resolve it in a similar way (in +/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note +/// also that like the Relooper, we implement a "minimal" intervention: we only +/// use the "label" helper for the blocks we absolutely must and no others. We +/// also prioritize code size and do not duplicate code in order to resolve +/// irreducibility. The graph algorithms for finding loops and entries and so +/// forth are also similar to the Relooper. The main differences between this +/// pass and the Relooper are: +/// +/// * We just care about irreducibility, so we just look at loops. +/// * The Relooper emits structured control flow (with ifs etc.), while we +/// emit a CFG. +/// +/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In +/// Proceedings of the ACM international conference companion on Object oriented +/// programming systems languages and applications companion (SPLASH '11). ACM, +/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 +/// http://doi.acm.org/10.1145/2048147.2048224 +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-fix-irreducible-control-flow" + +namespace { + +using BlockVector = SmallVector<MachineBasicBlock *, 4>; +using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>; + +// Calculates reachability in a region. Ignores branches to blocks outside of +// the region, and ignores branches to the region entry (for the case where +// the region is the inner part of a loop). +class ReachabilityGraph { +public: + ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks) + : Entry(Entry), Blocks(Blocks) { +#ifndef NDEBUG + // The region must have a single entry. + for (auto *MBB : Blocks) { + if (MBB != Entry) { + for (auto *Pred : MBB->predecessors()) { + assert(inRegion(Pred)); + } + } + } +#endif + calculate(); + } + + bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const { + assert(inRegion(From) && inRegion(To)); + auto I = Reachable.find(From); + if (I == Reachable.end()) + return false; + return I->second.count(To); + } + + // "Loopers" are blocks that are in a loop. We detect these by finding blocks + // that can reach themselves. + const BlockSet &getLoopers() const { return Loopers; } + + // Get all blocks that are loop entries. + const BlockSet &getLoopEntries() const { return LoopEntries; } + + // Get all blocks that enter a particular loop from outside. + const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const { + assert(inRegion(LoopEntry)); + auto I = LoopEnterers.find(LoopEntry); + assert(I != LoopEnterers.end()); + return I->second; + } + +private: + MachineBasicBlock *Entry; + const BlockSet &Blocks; + + BlockSet Loopers, LoopEntries; + DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers; + + bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); } + + // Maps a block to all the other blocks it can reach. + DenseMap<MachineBasicBlock *, BlockSet> Reachable; + + void calculate() { + // Reachability computation work list. Contains pairs of recent additions + // (A, B) where we just added a link A => B. + using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>; + SmallVector<BlockPair, 4> WorkList; + + // Add all relevant direct branches. + for (auto *MBB : Blocks) { + for (auto *Succ : MBB->successors()) { + if (Succ != Entry && inRegion(Succ)) { + Reachable[MBB].insert(Succ); + WorkList.emplace_back(MBB, Succ); + } + } + } + + while (!WorkList.empty()) { + MachineBasicBlock *MBB, *Succ; + std::tie(MBB, Succ) = WorkList.pop_back_val(); + assert(inRegion(MBB) && Succ != Entry && inRegion(Succ)); + if (MBB != Entry) { + // We recently added MBB => Succ, and that means we may have enabled + // Pred => MBB => Succ. + for (auto *Pred : MBB->predecessors()) { + if (Reachable[Pred].insert(Succ).second) { + WorkList.emplace_back(Pred, Succ); + } + } + } + } + + // Blocks that can return to themselves are in a loop. + for (auto *MBB : Blocks) { + if (canReach(MBB, MBB)) { + Loopers.insert(MBB); + } + } + assert(!Loopers.count(Entry)); + + // Find the loop entries - loopers reachable from blocks not in that loop - + // and those outside blocks that reach them, the "loop enterers". + for (auto *Looper : Loopers) { + for (auto *Pred : Looper->predecessors()) { + // Pred can reach Looper. If Looper can reach Pred, it is in the loop; + // otherwise, it is a block that enters into the loop. + if (!canReach(Looper, Pred)) { + LoopEntries.insert(Looper); + LoopEnterers[Looper].insert(Pred); + } + } + } + } +}; + +// Finds the blocks in a single-entry loop, given the loop entry and the +// list of blocks that enter the loop. +class LoopBlocks { +public: + LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers) + : Entry(Entry), Enterers(Enterers) { + calculate(); + } + + BlockSet &getBlocks() { return Blocks; } + +private: + MachineBasicBlock *Entry; + const BlockSet &Enterers; + + BlockSet Blocks; + + void calculate() { + // Going backwards from the loop entry, if we ignore the blocks entering + // from outside, we will traverse all the blocks in the loop. + BlockVector WorkList; + BlockSet AddedToWorkList; + Blocks.insert(Entry); + for (auto *Pred : Entry->predecessors()) { + if (!Enterers.count(Pred)) { + WorkList.push_back(Pred); + AddedToWorkList.insert(Pred); + } + } + + while (!WorkList.empty()) { + auto *MBB = WorkList.pop_back_val(); + assert(!Enterers.count(MBB)); + if (Blocks.insert(MBB).second) { + for (auto *Pred : MBB->predecessors()) { + if (!AddedToWorkList.count(Pred)) { + WorkList.push_back(Pred); + AddedToWorkList.insert(Pred); + } + } + } + } + } +}; + +class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Fix Irreducible Control Flow"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks, + MachineFunction &MF); + + void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks, + MachineFunction &MF, const ReachabilityGraph &Graph); + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} +}; + +bool WebAssemblyFixIrreducibleControlFlow::processRegion( + MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) { + bool Changed = false; + + // Remove irreducibility before processing child loops, which may take + // multiple iterations. + while (true) { + ReachabilityGraph Graph(Entry, Blocks); + + bool FoundIrreducibility = false; + + for (auto *LoopEntry : Graph.getLoopEntries()) { + // Find mutual entries - all entries which can reach this one, and + // are reached by it (that always includes LoopEntry itself). All mutual + // entries must be in the same loop, so if we have more than one, then we + // have irreducible control flow. + // + // Note that irreducibility may involve inner loops, e.g. imagine A + // starts one loop, and it has B inside it which starts an inner loop. + // If we add a branch from all the way on the outside to B, then in a + // sense B is no longer an "inner" loop, semantically speaking. We will + // fix that irreducibility by adding a block that dispatches to either + // either A or B, so B will no longer be an inner loop in our output. + // (A fancier approach might try to keep it as such.) + // + // Note that we still need to recurse into inner loops later, to handle + // the case where the irreducibility is entirely nested - we would not + // be able to identify that at this point, since the enclosing loop is + // a group of blocks all of whom can reach each other. (We'll see the + // irreducibility after removing branches to the top of that enclosing + // loop.) + BlockSet MutualLoopEntries; + MutualLoopEntries.insert(LoopEntry); + for (auto *OtherLoopEntry : Graph.getLoopEntries()) { + if (OtherLoopEntry != LoopEntry && + Graph.canReach(LoopEntry, OtherLoopEntry) && + Graph.canReach(OtherLoopEntry, LoopEntry)) { + MutualLoopEntries.insert(OtherLoopEntry); + } + } + + if (MutualLoopEntries.size() > 1) { + makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph); + FoundIrreducibility = true; + Changed = true; + break; + } + } + // Only go on to actually process the inner loops when we are done + // removing irreducible control flow and changing the graph. Modifying + // the graph as we go is possible, and that might let us avoid looking at + // the already-fixed loops again if we are careful, but all that is + // complex and bug-prone. Since irreducible loops are rare, just starting + // another iteration is best. + if (FoundIrreducibility) { + continue; + } + + for (auto *LoopEntry : Graph.getLoopEntries()) { + LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry)); + // Each of these calls to processRegion may change the graph, but are + // guaranteed not to interfere with each other. The only changes we make + // to the graph are to add blocks on the way to a loop entry. As the + // loops are disjoint, that means we may only alter branches that exit + // another loop, which are ignored when recursing into that other loop + // anyhow. + if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) { + Changed = true; + } + } + + return Changed; + } +} + +// Given a set of entries to a single loop, create a single entry for that +// loop by creating a dispatch block for them, routing control flow using +// a helper variable. Also updates Blocks with any new blocks created, so +// that we properly track all the blocks in the region. But this does not update +// ReachabilityGraph; this will be updated in the caller of this function as +// needed. +void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( + BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF, + const ReachabilityGraph &Graph) { + assert(Entries.size() >= 2); + + // Sort the entries to ensure a deterministic build. + BlockVector SortedEntries(Entries.begin(), Entries.end()); + llvm::sort(SortedEntries, + [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { + auto ANum = A->getNumber(); + auto BNum = B->getNumber(); + return ANum < BNum; + }); + +#ifndef NDEBUG + for (auto Block : SortedEntries) + assert(Block->getNumber() != -1); + if (SortedEntries.size() > 1) { + for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E; + ++I) { + auto ANum = (*I)->getNumber(); + auto BNum = (*(std::next(I)))->getNumber(); + assert(ANum != BNum); + } + } +#endif + + // Create a dispatch block which will contain a jump table to the entries. + MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(), Dispatch); + Blocks.insert(Dispatch); + + // Add the jump table. + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + MachineInstrBuilder MIB = + BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32)); + + // Add the register which will be used to tell the jump table which block to + // jump to. + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MIB.addReg(Reg); + + // Compute the indices in the superheader, one for each bad block, and + // add them as successors. + DenseMap<MachineBasicBlock *, unsigned> Indices; + for (auto *Entry : SortedEntries) { + auto Pair = Indices.insert(std::make_pair(Entry, 0)); + assert(Pair.second); + + unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; + Pair.first->second = Index; + + MIB.addMBB(Entry); + Dispatch->addSuccessor(Entry); + } + + // Rewrite the problematic successors for every block that wants to reach + // the bad blocks. For simplicity, we just introduce a new block for every + // edge we need to rewrite. (Fancier things are possible.) + + BlockVector AllPreds; + for (auto *Entry : SortedEntries) { + for (auto *Pred : Entry->predecessors()) { + if (Pred != Dispatch) { + AllPreds.push_back(Pred); + } + } + } + + // This set stores predecessors within this loop. + DenseSet<MachineBasicBlock *> InLoop; + for (auto *Pred : AllPreds) { + for (auto *Entry : Pred->successors()) { + if (!Entries.count(Entry)) + continue; + if (Graph.canReach(Entry, Pred)) { + InLoop.insert(Pred); + break; + } + } + } + + // Record if each entry has a layout predecessor. This map stores + // <<Predecessor is within the loop?, loop entry>, layout predecessor> + std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> + EntryToLayoutPred; + for (auto *Pred : AllPreds) + for (auto *Entry : Pred->successors()) + if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry)) + EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred; + + // We need to create at most two routing blocks per entry: one for + // predecessors outside the loop and one for predecessors inside the loop. + // This map stores + // <<Predecessor is within the loop?, loop entry>, routing block> + std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map; + for (auto *Pred : AllPreds) { + bool PredInLoop = InLoop.count(Pred); + for (auto *Entry : Pred->successors()) { + if (!Entries.count(Entry) || + Map.count(std::make_pair(InLoop.count(Pred), Entry))) + continue; + // If there exists a layout predecessor of this entry and this predecessor + // is not that, we rather create a routing block after that layout + // predecessor to save a branch. + if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) && + EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred) + continue; + + // This is a successor we need to rewrite. + MachineBasicBlock *Routing = MF.CreateMachineBasicBlock(); + MF.insert(Pred->isLayoutSuccessor(Entry) + ? MachineFunction::iterator(Entry) + : MF.end(), + Routing); + Blocks.insert(Routing); + + // Set the jump table's register of the index of the block we wish to + // jump to, and jump to the jump table. + BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) + .addImm(Indices[Entry]); + BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch); + Routing->addSuccessor(Dispatch); + Map[std::make_pair(PredInLoop, Entry)] = Routing; + } + } + + for (auto *Pred : AllPreds) { + bool PredInLoop = InLoop.count(Pred); + // Remap the terminator operands and the successor list. + for (MachineInstr &Term : Pred->terminators()) + for (auto &Op : Term.explicit_uses()) + if (Op.isMBB() && Indices.count(Op.getMBB())) + Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]); + + for (auto *Succ : Pred->successors()) { + if (!Entries.count(Succ)) + continue; + auto *Routing = Map[std::make_pair(PredInLoop, Succ)]; + Pred->replaceSuccessor(Succ, Routing); + } + } + + // Create a fake default label, because br_table requires one. + MIB.addMBB(MIB.getInstr() + ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) + .getMBB()); +} + +} // end anonymous namespace + +char WebAssemblyFixIrreducibleControlFlow::ID = 0; +INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, + "Removes irreducible control flow", false, false) + +FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { + return new WebAssemblyFixIrreducibleControlFlow(); +} + +bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" + "********** Function: " + << MF.getName() << '\n'); + + // Start the recursive process on the entire function body. + BlockSet AllBlocks; + for (auto &MBB : MF) { + AllBlocks.insert(&MBB); + } + + if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { + // We rewrote part of the function; recompute relevant things. + MF.getRegInfo().invalidateLiveness(); + MF.RenumberBlocks(); + return true; + } + + return false; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp new file mode 100644 index 000000000000..71eeebfada4b --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -0,0 +1,261 @@ +//===-- WebAssemblyFrameLowering.cpp - WebAssembly Frame Lowering ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the WebAssembly implementation of +/// TargetFrameLowering class. +/// +/// On WebAssembly, there aren't a lot of things to do here. There are no +/// callee-saved registers to save, and no spill slots. +/// +/// The stack grows downward. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyFrameLowering.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyInstrInfo.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyTargetMachine.h" +#include "WebAssemblyUtilities.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-frame-info" + +// TODO: wasm64 +// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions + +/// We need a base pointer in the case of having items on the stack that +/// require stricter alignment than the stack pointer itself. Because we need +/// to shift the stack pointer by some unknown amount to force the alignment, +/// we need to record the value of the stack pointer on entry to the function. +bool WebAssemblyFrameLowering::hasBP(const MachineFunction &MF) const { + const auto *RegInfo = + MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); + return RegInfo->needsStackRealignment(MF); +} + +/// Return true if the specified function should have a dedicated frame pointer +/// register. +bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // When we have var-sized objects, we move the stack pointer by an unknown + // amount, and need to emit a frame pointer to restore the stack to where we + // were on function entry. + // If we already need a base pointer, we use that to fix up the stack pointer. + // If there are no fixed-size objects, we would have no use of a frame + // pointer, and thus should not emit one. + bool HasFixedSizedObjects = MFI.getStackSize() > 0; + bool NeedsFixedReference = !hasBP(MF) || HasFixedSizedObjects; + + return MFI.isFrameAddressTaken() || + (MFI.hasVarSizedObjects() && NeedsFixedReference) || + MFI.hasStackMap() || MFI.hasPatchPoint(); +} + +/// Under normal circumstances, when a frame pointer is not required, we reserve +/// argument space for call sites in the function immediately on entry to the +/// current function. This eliminates the need for add/sub sp brackets around +/// call sites. Returns true if the call frame is included as part of the stack +/// frame. +bool WebAssemblyFrameLowering::hasReservedCallFrame( + const MachineFunction &MF) const { + return !MF.getFrameInfo().hasVarSizedObjects(); +} + +// Returns true if this function needs a local user-space stack pointer for its +// local frame (not for exception handling). +bool WebAssemblyFrameLowering::needsSPForLocalFrame( + const MachineFunction &MF) const { + auto &MFI = MF.getFrameInfo(); + return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF); +} + +// In function with EH pads, we need to make a copy of the value of +// __stack_pointer global in SP32 register, in order to use it when restoring +// __stack_pointer after an exception is caught. +bool WebAssemblyFrameLowering::needsPrologForEH( + const MachineFunction &MF) const { + auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType(); + return EHType == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn() && MF.getFrameInfo().hasCalls(); +} + +/// Returns true if this function needs a local user-space stack pointer. +/// Unlike a machine stack pointer, the wasm user stack pointer is a global +/// variable, so it is loaded into a register in the prolog. +bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const { + return needsSPForLocalFrame(MF) || needsPrologForEH(MF); +} + +/// Returns true if the local user-space stack pointer needs to be written back +/// to __stack_pointer global by this function (this is not meaningful if +/// needsSP is false). If false, the stack red zone can be used and only a local +/// SP is needed. +bool WebAssemblyFrameLowering::needsSPWriteback( + const MachineFunction &MF) const { + auto &MFI = MF.getFrameInfo(); + assert(needsSP(MF)); + // When we don't need a local stack pointer for its local frame but only to + // support EH, we don't need to write SP back in the epilog, because we don't + // bump down the stack pointer in the prolog. We need to write SP back in the + // epilog only if + // 1. We need SP not only for EH support but also because we actually use + // stack or we have a frame address taken. + // 2. We cannot use the red zone. + bool CanUseRedZone = MFI.getStackSize() <= RedZoneSize && !MFI.hasCalls() && + !MF.getFunction().hasFnAttribute(Attribute::NoRedZone); + return needsSPForLocalFrame(MF) && !CanUseRedZone; +} + +void WebAssemblyFrameLowering::writeSPToGlobal( + unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const { + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32)) + .addExternalSymbol(SPSymbol) + .addReg(SrcReg); +} + +MachineBasicBlock::iterator +WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) && + "Call frame pseudos should only be used for dynamic stack adjustment"); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && + needsSPWriteback(MF)) { + DebugLoc DL = I->getDebugLoc(); + writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL); + } + return MBB.erase(I); +} + +void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions + auto &MFI = MF.getFrameInfo(); + assert(MFI.getCalleeSavedInfo().empty() && + "WebAssembly should not have callee-saved registers"); + + if (!needsSP(MF)) + return; + uint64_t StackSize = MFI.getStackSize(); + + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + auto InsertPt = MBB.begin(); + while (InsertPt != MBB.end() && + WebAssembly::isArgument(InsertPt->getOpcode())) + ++InsertPt; + DebugLoc DL; + + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned SPReg = WebAssembly::SP32; + if (StackSize) + SPReg = MRI.createVirtualRegister(PtrRC); + + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg) + .addExternalSymbol(SPSymbol); + + bool HasBP = hasBP(MF); + if (HasBP) { + auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); + Register BasePtr = MRI.createVirtualRegister(PtrRC); + FI->setBasePointerVreg(BasePtr); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr) + .addReg(SPReg); + } + if (StackSize) { + // Subtract the frame size + Register OffsetReg = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), + WebAssembly::SP32) + .addReg(SPReg) + .addReg(OffsetReg); + } + if (HasBP) { + Register BitmaskReg = MRI.createVirtualRegister(PtrRC); + unsigned Alignment = MFI.getMaxAlignment(); + assert((1u << countTrailingZeros(Alignment)) == Alignment && + "Alignment must be a power of 2"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg) + .addImm((int)~(Alignment - 1)); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32), + WebAssembly::SP32) + .addReg(WebAssembly::SP32) + .addReg(BitmaskReg); + } + if (hasFP(MF)) { + // Unlike most conventional targets (where FP points to the saved FP), + // FP points to the bottom of the fixed-size locals, so we can use positive + // offsets in load/store instructions. + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) + .addReg(WebAssembly::SP32); + } + if (StackSize && needsSPWriteback(MF)) { + writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL); + } +} + +void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + uint64_t StackSize = MF.getFrameInfo().getStackSize(); + if (!needsSP(MF) || !needsSPWriteback(MF)) + return; + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + auto InsertPt = MBB.getFirstTerminator(); + DebugLoc DL; + + if (InsertPt != MBB.end()) + DL = InsertPt->getDebugLoc(); + + // Restore the stack pointer. If we had fixed-size locals, add the offset + // subtracted in the prolog. + unsigned SPReg = 0; + if (hasBP(MF)) { + auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); + SPReg = FI->getBasePointerVreg(); + } else if (StackSize) { + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + Register OffsetReg = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + // In the epilog we don't need to write the result back to the SP32 physreg + // because it won't be used again. We can use a stackified register instead. + SPReg = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) + .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) + .addReg(OffsetReg); + } else { + SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; + } + + writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h new file mode 100644 index 000000000000..fdc0f561dcd9 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -0,0 +1,65 @@ +// WebAssemblyFrameLowering.h - TargetFrameLowering for WebAssembly -*- C++ -*-/ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This class implements WebAssembly-specific bits of +/// TargetFrameLowering class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H + +#include "llvm/CodeGen/TargetFrameLowering.h" + +namespace llvm { +class MachineFrameInfo; + +class WebAssemblyFrameLowering final : public TargetFrameLowering { +public: + /// Size of the red zone for the user stack (leaf functions can use this much + /// space below the stack pointer without writing it back to __stack_pointer + /// global). + // TODO: (ABI) Revisit and decide how large it should be. + static const size_t RedZoneSize = 128; + + WebAssemblyFrameLowering() + : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/Align(16), + /*LocalAreaOffset=*/0, + /*TransientStackAlignment=*/Align(16), + /*StackRealignable=*/true) {} + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + + /// These methods insert prolog and epilog code into the function. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + bool hasFP(const MachineFunction &MF) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; + + bool needsPrologForEH(const MachineFunction &MF) const; + + /// Write SP back to __stack_pointer global. + void writeSPToGlobal(unsigned SrcReg, MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsertStore, + const DebugLoc &DL) const; + +private: + bool hasBP(const MachineFunction &MF) const; + bool needsSPForLocalFrame(const MachineFunction &MF) const; + bool needsSP(const MachineFunction &MF) const; + bool needsSPWriteback(const MachineFunction &MF) const; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def new file mode 100644 index 000000000000..13f0476eb4a5 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -0,0 +1,38 @@ +//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the various WebAssembly ISD node types. +/// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +HANDLE_NODETYPE(CALL1) +HANDLE_NODETYPE(CALL0) +HANDLE_NODETYPE(RET_CALL) +HANDLE_NODETYPE(RETURN) +HANDLE_NODETYPE(ARGUMENT) +// A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol +HANDLE_NODETYPE(Wrapper) +// A special wapper used in PIC code for __memory_base/__table_base relcative +// access. +HANDLE_NODETYPE(WrapperPIC) +HANDLE_NODETYPE(BR_IF) +HANDLE_NODETYPE(BR_TABLE) +HANDLE_NODETYPE(SHUFFLE) +HANDLE_NODETYPE(SWIZZLE) +HANDLE_NODETYPE(VEC_SHL) +HANDLE_NODETYPE(VEC_SHR_S) +HANDLE_NODETYPE(VEC_SHR_U) +HANDLE_NODETYPE(LOAD_SPLAT) +HANDLE_NODETYPE(THROW) +HANDLE_NODETYPE(MEMORY_COPY) +HANDLE_NODETYPE(MEMORY_FILL) + +// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp new file mode 100644 index 000000000000..f83a8a984ae0 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -0,0 +1,241 @@ +//- WebAssemblyISelDAGToDAG.cpp - A dag to dag inst selector for WebAssembly -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines an instruction selector for the WebAssembly target. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" // To access function attributes. +#include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-isel" + +//===--------------------------------------------------------------------===// +/// WebAssembly-specific code to select WebAssembly machine instructions for +/// SelectionDAG operations. +/// +namespace { +class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the + /// right decision when generating code for different targets. + const WebAssemblySubtarget *Subtarget; + + bool ForCodeSize; + +public: + WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr), ForCodeSize(false) { + } + + StringRef getPassName() const override { + return "WebAssembly Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n" + "********** Function: " + << MF.getName() << '\n'); + + ForCodeSize = MF.getFunction().hasOptSize(); + Subtarget = &MF.getSubtarget<WebAssemblySubtarget>(); + + // Wasm64 is not fully supported right now (and is not specified) + if (Subtarget->hasAddr64()) + report_fatal_error( + "64-bit WebAssembly (wasm64) is not currently supported"); + + return SelectionDAGISel::runOnMachineFunction(MF); + } + + void Select(SDNode *Node) override; + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector<SDValue> &OutOps) override; + +// Include the pieces autogenerated from the target description. +#include "WebAssemblyGenDAGISel.inc" + +private: + // add select functions here... +}; +} // end anonymous namespace + +void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return; + } + + // Few custom selection stuff. + SDLoc DL(Node); + MachineFunction &MF = CurDAG->getMachineFunction(); + switch (Node->getOpcode()) { + case ISD::ATOMIC_FENCE: { + if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics()) + break; + + uint64_t SyncScopeID = + cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue(); + MachineSDNode *Fence = nullptr; + switch (SyncScopeID) { + case SyncScope::SingleThread: + // We lower a single-thread fence to a pseudo compiler barrier instruction + // preventing instruction reordering. This will not be emitted in final + // binary. + Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, + DL, // debug loc + MVT::Other, // outchain type + Node->getOperand(0) // inchain + ); + break; + case SyncScope::System: + // Currently wasm only supports sequentially consistent atomics, so we + // always set the order to 0 (sequentially consistent). + Fence = CurDAG->getMachineNode( + WebAssembly::ATOMIC_FENCE, + DL, // debug loc + MVT::Other, // outchain type + CurDAG->getTargetConstant(0, DL, MVT::i32), // order + Node->getOperand(0) // inchain + ); + break; + default: + llvm_unreachable("Unknown scope!"); + } + + ReplaceNode(Node, Fence); + CurDAG->RemoveDeadNode(Node); + return; + } + + case ISD::GlobalTLSAddress: { + const auto *GA = cast<GlobalAddressSDNode>(Node); + + if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory()) + report_fatal_error("cannot use thread-local storage without bulk memory", + false); + + // Currently Emscripten does not support dynamic linking with threads. + // Therefore, if we have thread-local storage, only the local-exec model + // is possible. + // TODO: remove this and implement proper TLS models once Emscripten + // supports dynamic linking with threads. + if (GA->getGlobal()->getThreadLocalMode() != + GlobalValue::LocalExecTLSModel && + !Subtarget->getTargetTriple().isOSEmscripten()) { + report_fatal_error("only -ftls-model=local-exec is supported for now on " + "non-Emscripten OSes: variable " + + GA->getGlobal()->getName(), + false); + } + + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT); + SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0); + + MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, + DL, MVT::i32, TLSBaseSym); + MachineSDNode *TLSOffset = CurDAG->getMachineNode( + WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym); + MachineSDNode *TLSAddress = + CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32, + SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); + ReplaceNode(Node, TLSAddress); + return; + } + + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + switch (IntNo) { + case Intrinsic::wasm_tls_size: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSSize = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32)); + ReplaceNode(Node, TLSSize); + return; + } + case Intrinsic::wasm_tls_align: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSAlign = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32)); + ReplaceNode(Node, TLSAlign); + return; + } + } + break; + } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + case Intrinsic::wasm_tls_base: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSBase = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other, + CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), + Node->getOperand(0)); + ReplaceNode(Node, TLSBase); + return; + } + } + break; + } + + default: + break; + } + + // Select the default instruction. + SelectCode(Node); +} + +bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { + switch (ConstraintID) { + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + // We just support simple memory operands that just have a single address + // operand and need no special handling. + OutOps.push_back(Op); + return false; + default: + break; + } + + return true; +} + +/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready +/// for instruction scheduling. +FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new WebAssemblyDAGToDAGISel(TM, OptLevel); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp new file mode 100644 index 000000000000..f06afdbcea9e --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -0,0 +1,1553 @@ +//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the WebAssemblyTargetLowering class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyISelLowering.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower" + +WebAssemblyTargetLowering::WebAssemblyTargetLowering( + const TargetMachine &TM, const WebAssemblySubtarget &STI) + : TargetLowering(TM), Subtarget(&STI) { + auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; + + // Booleans always contain 0 or 1. + setBooleanContents(ZeroOrOneBooleanContent); + // Except in SIMD vectors + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + // We don't know the microarchitecture here, so just reduce register pressure. + setSchedulingPreference(Sched::RegPressure); + // Tell ISel that we have a stack pointer. + setStackPointerRegisterToSaveRestore( + Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); + // Set up the register classes. + addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); + addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); + addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); + addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); + if (Subtarget->hasSIMD128()) { + addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); + } + if (Subtarget->hasUnimplementedSIMD128()) { + addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); + } + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget->getRegisterInfo()); + + setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); + setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); + setOperationAction(ISD::JumpTable, MVTPtr, Custom); + setOperationAction(ISD::BlockAddress, MVTPtr, Custom); + setOperationAction(ISD::BRIND, MVT::Other, Custom); + + // Take the default expansion for va_arg, va_copy, and va_end. There is no + // default action for va_start, so we do that custom. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + + for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { + // Don't expand the floating-point types to constant pools. + setOperationAction(ISD::ConstantFP, T, Legal); + // Expand floating-point comparisons. + for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, + ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) + setCondCodeAction(CC, T, Expand); + // Expand floating-point library function operators. + for (auto Op : + {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) + setOperationAction(Op, T, Expand); + // Note supported floating-point library function operators that otherwise + // default to expand. + for (auto Op : + {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) + setOperationAction(Op, T, Legal); + // Support minimum and maximum, which otherwise default to expand. + setOperationAction(ISD::FMINIMUM, T, Legal); + setOperationAction(ISD::FMAXIMUM, T, Legal); + // WebAssembly currently has no builtin f16 support. + setOperationAction(ISD::FP16_TO_FP, T, Expand); + setOperationAction(ISD::FP_TO_FP16, T, Expand); + setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); + setTruncStoreAction(T, MVT::f16, Expand); + } + + // Expand unavailable integer operations. + for (auto Op : + {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, + ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, + ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { + for (auto T : {MVT::i32, MVT::i64}) + setOperationAction(Op, T, Expand); + if (Subtarget->hasSIMD128()) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + setOperationAction(Op, T, Expand); + if (Subtarget->hasUnimplementedSIMD128()) + setOperationAction(Op, MVT::v2i64, Expand); + } + + // SIMD-specific configuration + if (Subtarget->hasSIMD128()) { + // Support saturating add for i8x16 and i16x8 + for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) + for (auto T : {MVT::v16i8, MVT::v8i16}) + setOperationAction(Op, T, Legal); + + // Custom lower BUILD_VECTORs to minimize number of replace_lanes + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + if (Subtarget->hasUnimplementedSIMD128()) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + + // We have custom shuffle lowering to expose the shuffle mask + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); + if (Subtarget->hasUnimplementedSIMD128()) + for (auto T: {MVT::v2i64, MVT::v2f64}) + setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); + + // Custom lowering since wasm shifts must have a scalar shift amount + for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + setOperationAction(Op, T, Custom); + if (Subtarget->hasUnimplementedSIMD128()) + setOperationAction(Op, MVT::v2i64, Custom); + } + + // Custom lower lane accesses to expand out variable indices + for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + setOperationAction(Op, T, Custom); + if (Subtarget->hasUnimplementedSIMD128()) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(Op, T, Custom); + } + + // There is no i64x2.mul instruction + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + + // There are no vector select instructions + for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + setOperationAction(Op, T, Expand); + if (Subtarget->hasUnimplementedSIMD128()) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(Op, T, Expand); + } + + // Expand integer operations supported for scalars but not SIMD + for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + setOperationAction(Op, T, Expand); + if (Subtarget->hasUnimplementedSIMD128()) + setOperationAction(Op, MVT::v2i64, Expand); + } + + // Expand float operations supported for scalars but not SIMD + for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, + ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, + ISD::FEXP, ISD::FEXP2, ISD::FRINT}) { + setOperationAction(Op, MVT::v4f32, Expand); + if (Subtarget->hasUnimplementedSIMD128()) + setOperationAction(Op, MVT::v2f64, Expand); + } + + // Expand additional SIMD ops that V8 hasn't implemented yet + if (!Subtarget->hasUnimplementedSIMD128()) { + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + } + } + + // As a special case, these operators use the type to mean the type to + // sign-extend from. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + if (!Subtarget->hasSignExt()) { + // Sign extends are legal only when extending a vector extract + auto Action = Subtarget->hasSIMD128() ? Custom : Expand; + for (auto T : {MVT::i8, MVT::i16, MVT::i32}) + setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); + } + for (auto T : MVT::integer_fixedlen_vector_valuetypes()) + setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); + + // Dynamic stack allocation: use the default expansion. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); + + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setOperationAction(ISD::CopyToReg, MVT::Other, Custom); + + // Expand these forms; we pattern-match the forms that we can handle in isel. + for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) + for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) + setOperationAction(Op, T, Expand); + + // We have custom switch handling. + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + + // WebAssembly doesn't have: + // - Floating-point extending loads. + // - Floating-point truncating stores. + // - i1 extending loads. + // - truncating SIMD stores and most extending loads + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + for (auto T : MVT::integer_valuetypes()) + for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) + setLoadExtAction(Ext, T, MVT::i1, Promote); + if (Subtarget->hasSIMD128()) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, + MVT::v2f64}) { + for (auto MemT : MVT::fixedlen_vector_valuetypes()) { + if (MVT(T) != MemT) { + setTruncStoreAction(T, MemT, Expand); + for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) + setLoadExtAction(Ext, T, MemT, Expand); + } + } + } + // But some vector extending loads are legal + if (Subtarget->hasUnimplementedSIMD128()) { + for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { + setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); + } + } + } + + // Don't do anything clever with build_pairs + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // Trap lowers to wasm unreachable + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // Exception handling intrinsics + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + + setMaxAtomicSizeInBitsSupported(64); + + // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is + // consistent with the f64 and f128 names. + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); + + // Define the emscripten name for return address helper. + // TODO: when implementing other WASM backends, make this generic or only do + // this on emscripten depending on what they end up doing. + setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address"); + + // Always convert switches to br_tables unless there is only one case, which + // is equivalent to a simple branch. This reduces code size for wasm, and we + // defer possible jump table optimizations to the VM. + setMinimumJumpTableEntries(2); +} + +TargetLowering::AtomicExpansionKind +WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // We have wasm instructions for these + switch (AI->getOperation()) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::And: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + case AtomicRMWInst::Xchg: + return AtomicExpansionKind::None; + default: + break; + } + return AtomicExpansionKind::CmpXChg; +} + +FastISel *WebAssemblyTargetLowering::createFastISel( + FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { + return WebAssembly::createFastISel(FuncInfo, LibInfo); +} + +MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, + EVT VT) const { + unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); + if (BitWidth > 1 && BitWidth < 8) + BitWidth = 8; + + if (BitWidth > 64) { + // The shift will be lowered to a libcall, and compiler-rt libcalls expect + // the count to be an i32. + BitWidth = 32; + assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && + "32-bit shift counts ought to be enough for anyone"); + } + + MVT Result = MVT::getIntegerVT(BitWidth); + assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && + "Unable to represent scalar shift amount type"); + return Result; +} + +// Lower an fp-to-int conversion operator from the LLVM opcode, which has an +// undefined result on invalid/overflow, to the WebAssembly opcode, which +// traps on invalid/overflow. +static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *BB, + const TargetInstrInfo &TII, + bool IsUnsigned, bool Int64, + bool Float64, unsigned LoweredOpcode) { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + Register OutReg = MI.getOperand(0).getReg(); + Register InReg = MI.getOperand(1).getReg(); + + unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; + unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; + unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; + unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; + unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; + unsigned Eqz = WebAssembly::EQZ_I32; + unsigned And = WebAssembly::AND_I32; + int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; + int64_t Substitute = IsUnsigned ? 0 : Limit; + double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; + auto &Context = BB->getParent()->getFunction().getContext(); + Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); + + const BasicBlock *LLVMBB = BB->getBasicBlock(); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); + MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); + MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); + + MachineFunction::iterator It = ++BB->getIterator(); + F->insert(It, FalseMBB); + F->insert(It, TrueMBB); + F->insert(It, DoneMBB); + + // Transfer the remainder of BB and its successor edges to DoneMBB. + DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); + DoneMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(TrueMBB); + BB->addSuccessor(FalseMBB); + TrueMBB->addSuccessor(DoneMBB); + FalseMBB->addSuccessor(DoneMBB); + + unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; + Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); + Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); + CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); + TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); + + MI.eraseFromParent(); + // For signed numbers, we can do a single comparison to determine whether + // fabs(x) is within range. + if (IsUnsigned) { + Tmp0 = InReg; + } else { + BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); + } + BuildMI(BB, DL, TII.get(FConst), Tmp1) + .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal))); + BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); + + // For unsigned numbers, we have to do a separate comparison with zero. + if (IsUnsigned) { + Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); + Register SecondCmpReg = + MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(BB, DL, TII.get(FConst), Tmp1) + .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); + BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); + BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); + CmpReg = AndReg; + } + + BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); + + // Create the CFG diamond to select between doing the conversion or using + // the substitute value. + BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); + BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); + BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); + BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); + BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) + .addReg(FalseReg) + .addMBB(FalseMBB) + .addReg(TrueReg) + .addMBB(TrueMBB); + + return DoneMBB; +} + +MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + case WebAssembly::FP_TO_SINT_I32_F32: + return LowerFPToInt(MI, DL, BB, TII, false, false, false, + WebAssembly::I32_TRUNC_S_F32); + case WebAssembly::FP_TO_UINT_I32_F32: + return LowerFPToInt(MI, DL, BB, TII, true, false, false, + WebAssembly::I32_TRUNC_U_F32); + case WebAssembly::FP_TO_SINT_I64_F32: + return LowerFPToInt(MI, DL, BB, TII, false, true, false, + WebAssembly::I64_TRUNC_S_F32); + case WebAssembly::FP_TO_UINT_I64_F32: + return LowerFPToInt(MI, DL, BB, TII, true, true, false, + WebAssembly::I64_TRUNC_U_F32); + case WebAssembly::FP_TO_SINT_I32_F64: + return LowerFPToInt(MI, DL, BB, TII, false, false, true, + WebAssembly::I32_TRUNC_S_F64); + case WebAssembly::FP_TO_UINT_I32_F64: + return LowerFPToInt(MI, DL, BB, TII, true, false, true, + WebAssembly::I32_TRUNC_U_F64); + case WebAssembly::FP_TO_SINT_I64_F64: + return LowerFPToInt(MI, DL, BB, TII, false, true, true, + WebAssembly::I64_TRUNC_S_F64); + case WebAssembly::FP_TO_UINT_I64_F64: + return LowerFPToInt(MI, DL, BB, TII, true, true, true, + WebAssembly::I64_TRUNC_U_F64); + llvm_unreachable("Unexpected instruction to emit with custom inserter"); + } +} + +const char * +WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { + case WebAssemblyISD::FIRST_NUMBER: + break; +#define HANDLE_NODETYPE(NODE) \ + case WebAssemblyISD::NODE: \ + return "WebAssemblyISD::" #NODE; +#include "WebAssemblyISD.def" +#undef HANDLE_NODETYPE + } + return nullptr; +} + +std::pair<unsigned, const TargetRegisterClass *> +WebAssemblyTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + // First, see if this is a constraint that directly corresponds to a + // WebAssembly register class. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (Subtarget->hasSIMD128() && VT.isVector()) { + if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &WebAssembly::V128RegClass); + } + if (VT.isInteger() && !VT.isVector()) { + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &WebAssembly::I32RegClass); + if (VT.getSizeInBits() <= 64) + return std::make_pair(0U, &WebAssembly::I64RegClass); + } + break; + default: + break; + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { + // Assume ctz is a relatively cheap operation. + return true; +} + +bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { + // Assume clz is a relatively cheap operation. + return true; +} + +bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, + Type *Ty, unsigned AS, + Instruction *I) const { + // WebAssembly offsets are added as unsigned without wrapping. The + // isLegalAddressingMode gives us no way to determine if wrapping could be + // happening, so we approximate this by accepting only non-negative offsets. + if (AM.BaseOffs < 0) + return false; + + // WebAssembly has no scale register operands. + if (AM.Scale != 0) + return false; + + // Everything else is legal. + return true; +} + +bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( + EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, + MachineMemOperand::Flags /*Flags*/, bool *Fast) const { + // WebAssembly supports unaligned accesses, though it should be declared + // with the p2align attribute on loads and stores which do so, and there + // may be a performance impact. We tell LLVM they're "fast" because + // for the kinds of things that LLVM uses this for (merging adjacent stores + // of constants, etc.), WebAssembly implementations will either want the + // unaligned access or they'll split anyway. + if (Fast) + *Fast = true; + return true; +} + +bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, + AttributeList Attr) const { + // The current thinking is that wasm engines will perform this optimization, + // so we can save on code size. + return true; +} + +bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + if (!Subtarget->hasUnimplementedSIMD128()) + return false; + MVT ExtT = ExtVal.getSimpleValueType(); + MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0); + return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || + (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || + (ExtT == MVT::v2i64 && MemT == MVT::v2i32); +} + +EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &C, + EVT VT) const { + if (VT.isVector()) + return VT.changeVectorElementTypeToInteger(); + + return TargetLowering::getSetCCResultType(DL, C, VT); +} + +bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const { + switch (Intrinsic) { + case Intrinsic::wasm_atomic_notify: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(4); + // atomic.notify instruction does not really load the memory specified with + // this argument, but MachineMemOperand should either be load or store, so + // we set this to a load. + // FIXME Volatile isn't really correct, but currently all LLVM atomic + // instructions are treated as volatiles in the backend, so we should be + // consistent. The same applies for wasm_atomic_wait intrinsics too. + Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; + return true; + case Intrinsic::wasm_atomic_wait_i32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(4); + Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; + return true; + case Intrinsic::wasm_atomic_wait_i64: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i64; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(8); + Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; + return true; + default: + return false; + } +} + +//===----------------------------------------------------------------------===// +// WebAssembly Lowering private implementation. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Lowering Code +//===----------------------------------------------------------------------===// + +static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { + MachineFunction &MF = DAG.getMachineFunction(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); +} + +// Test whether the given calling convention is supported. +static bool callingConvSupported(CallingConv::ID CallConv) { + // We currently support the language-independent target-independent + // conventions. We don't yet have a way to annotate calls with properties like + // "cold", and we don't have any call-clobbered registers, so these are mostly + // all handled the same. + return CallConv == CallingConv::C || CallConv == CallingConv::Fast || + CallConv == CallingConv::Cold || + CallConv == CallingConv::PreserveMost || + CallConv == CallingConv::PreserveAll || + CallConv == CallingConv::CXX_FAST_TLS || + CallConv == CallingConv::WASM_EmscriptenInvoke; +} + +SDValue +WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + MachineFunction &MF = DAG.getMachineFunction(); + auto Layout = MF.getDataLayout(); + + CallingConv::ID CallConv = CLI.CallConv; + if (!callingConvSupported(CallConv)) + fail(DL, DAG, + "WebAssembly doesn't support language-specific or target-specific " + "calling conventions yet"); + if (CLI.IsPatchPoint) + fail(DL, DAG, "WebAssembly doesn't support patch point yet"); + + if (CLI.IsTailCall) { + bool MustTail = CLI.CS && CLI.CS.isMustTailCall(); + if (Subtarget->hasTailCall() && !CLI.IsVarArg) { + // Do not tail call unless caller and callee return types match + const Function &F = MF.getFunction(); + const TargetMachine &TM = getTargetMachine(); + Type *RetTy = F.getReturnType(); + SmallVector<MVT, 4> CallerRetTys; + SmallVector<MVT, 4> CalleeRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); + bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && + std::equal(CallerRetTys.begin(), CallerRetTys.end(), + CalleeRetTys.begin()); + if (!TypesMatch) { + // musttail in this case would be an LLVM IR validation failure + assert(!MustTail); + CLI.IsTailCall = false; + } + } else { + CLI.IsTailCall = false; + if (MustTail) { + if (CLI.IsVarArg) { + // The return would pop the argument buffer + fail(DL, DAG, "WebAssembly does not support varargs tail calls"); + } else { + fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); + } + } + } + } + + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + if (Ins.size() > 1) + fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); + + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + + // The generic code may have added an sret argument. If we're lowering an + // invoke function, the ABI requires that the function pointer be the first + // argument, so we may have to swap the arguments. + if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && + Outs[0].Flags.isSRet()) { + std::swap(Outs[0], Outs[1]); + std::swap(OutVals[0], OutVals[1]); + } + + unsigned NumFixedArgs = 0; + for (unsigned I = 0; I < Outs.size(); ++I) { + const ISD::OutputArg &Out = Outs[I]; + SDValue &OutVal = OutVals[I]; + if (Out.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { + auto &MFI = MF.getFrameInfo(); + int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), + Out.Flags.getByValAlign(), + /*isSS=*/false); + SDValue SizeNode = + DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); + SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); + Chain = DAG.getMemcpy( + Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), + /*isVolatile*/ false, /*AlwaysInline=*/false, + /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); + OutVal = FINode; + } + // Count the number of fixed args *after* legalization. + NumFixedArgs += Out.IsFixed; + } + + bool IsVarArg = CLI.IsVarArg; + auto PtrVT = getPointerTy(Layout); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + if (IsVarArg) { + // Outgoing non-fixed arguments are placed in a buffer. First + // compute their offsets and the total amount of buffer space needed. + for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { + const ISD::OutputArg &Out = Outs[I]; + SDValue &Arg = OutVals[I]; + EVT VT = Arg.getValueType(); + assert(VT != MVT::iPTR && "Legalized args should be concrete"); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Align = std::max(Out.Flags.getOrigAlign(), + Layout.getABITypeAlignment(Ty)); + unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), + Align); + CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), + Offset, VT.getSimpleVT(), + CCValAssign::Full)); + } + } + + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); + + SDValue FINode; + if (IsVarArg && NumBytes) { + // For non-fixed arguments, next emit stores to store the argument values + // to the stack buffer at the offsets computed above. + int FI = MF.getFrameInfo().CreateStackObject(NumBytes, + Layout.getStackAlignment(), + /*isSS=*/false); + unsigned ValNo = 0; + SmallVector<SDValue, 8> Chains; + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + assert(ArgLocs[ValNo].getValNo() == ValNo && + "ArgLocs should remain in order and only hold varargs args"); + unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); + FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); + SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, + DAG.getConstant(Offset, DL, PtrVT)); + Chains.push_back( + DAG.getStore(Chain, DL, Arg, Add, + MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); + } + if (!Chains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } else if (IsVarArg) { + FINode = DAG.getIntPtrConstant(0, DL); + } + + if (Callee->getOpcode() == ISD::GlobalAddress) { + // If the callee is a GlobalAddress node (quite common, every direct call + // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress + // doesn't at MO_GOT which is not needed for direct calls. + GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee); + Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, + getPointerTy(DAG.getDataLayout()), + GA->getOffset()); + Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, + getPointerTy(DAG.getDataLayout()), Callee); + } + + // Compute the operands for the CALLn node. + SmallVector<SDValue, 16> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs + // isn't reliable. + Ops.append(OutVals.begin(), + IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); + // Add a pointer to the vararg buffer. + if (IsVarArg) + Ops.push_back(FINode); + + SmallVector<EVT, 8> InTys; + for (const auto &In : Ins) { + assert(!In.Flags.isByVal() && "byval is not valid for return values"); + assert(!In.Flags.isNest() && "nest is not valid for return values"); + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, + "WebAssembly hasn't implemented cons regs last return values"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. + InTys.push_back(In.VT); + } + + if (CLI.IsTailCall) { + // ret_calls do not return values to the current frame + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops); + } + + InTys.push_back(MVT::Other); + SDVTList InTyList = DAG.getVTList(InTys); + SDValue Res = + DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, + DL, InTyList, Ops); + if (Ins.empty()) { + Chain = Res; + } else { + InVals.push_back(Res); + Chain = Res.getValue(1); + } + + return Chain; +} + +bool WebAssemblyTargetLowering::CanLowerReturn( + CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext & /*Context*/) const { + // WebAssembly can only handle returning tuples with multivalue enabled + return Subtarget->hasMultivalue() || Outs.size() <= 1; +} + +SDValue WebAssemblyTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const { + assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && + "MVP WebAssembly can only return up to one value"); + if (!callingConvSupported(CallConv)) + fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); + + SmallVector<SDValue, 4> RetOps(1, Chain); + RetOps.append(OutVals.begin(), OutVals.end()); + Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); + + // Record the number and types of the return values. + for (const ISD::OutputArg &Out : Outs) { + assert(!Out.Flags.isByVal() && "byval is not valid for return values"); + assert(!Out.Flags.isNest() && "nest is not valid for return values"); + assert(Out.IsFixed && "non-fixed return value is not valid"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); + } + + return Chain; +} + +SDValue WebAssemblyTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + if (!callingConvSupported(CallConv)) + fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); + + MachineFunction &MF = DAG.getMachineFunction(); + auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); + + // Set up the incoming ARGUMENTS value, which serves to represent the liveness + // of the incoming values before they're represented by virtual registers. + MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); + + for (const ISD::InputArg &In : Ins) { + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (In.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. + InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, + DAG.getTargetConstant(InVals.size(), + DL, MVT::i32)) + : DAG.getUNDEF(In.VT)); + + // Record the number and types of arguments. + MFI->addParam(In.VT); + } + + // Varargs are copied into a buffer allocated by the caller, and a pointer to + // the buffer is passed as an argument. + if (IsVarArg) { + MVT PtrVT = getPointerTy(MF.getDataLayout()); + Register VarargVreg = + MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); + MFI->setVarargBufferVreg(VarargVreg); + Chain = DAG.getCopyToReg( + Chain, DL, VarargVreg, + DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, + DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); + MFI->addParam(PtrVT); + } + + // Record the number and types of arguments and results. + SmallVector<MVT, 4> Params; + SmallVector<MVT, 4> Results; + computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(), + DAG.getTarget(), Params, Results); + for (MVT VT : Results) + MFI->addResult(VT); + // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify + // the param logic here with ComputeSignatureVTs + assert(MFI->getParams().size() == Params.size() && + std::equal(MFI->getParams().begin(), MFI->getParams().end(), + Params.begin())); + + return Chain; +} + +void WebAssemblyTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { + switch (N->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + // Do not add any results, signifying that N should not be custom lowered + // after all. This happens because simd128 turns on custom lowering for + // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an + // illegal type. + break; + default: + llvm_unreachable( + "ReplaceNodeResults not implemented for this op for WebAssembly!"); + } +} + +//===----------------------------------------------------------------------===// +// Custom lowering hooks. +//===----------------------------------------------------------------------===// + +SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + switch (Op.getOpcode()) { + default: + llvm_unreachable("unimplemented operation lowering"); + return SDValue(); + case ISD::FrameIndex: + return LowerFrameIndex(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::ExternalSymbol: + return LowerExternalSymbol(Op, DAG); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::BR_JT: + return LowerBR_JT(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::BlockAddress: + case ISD::BRIND: + fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); + return SDValue(); + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG); + case ISD::CopyToReg: + return LowerCopyToReg(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + case ISD::INSERT_VECTOR_ELT: + return LowerAccessVectorElement(Op, DAG); + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + return LowerIntrinsic(Op, DAG); + case ISD::SIGN_EXTEND_INREG: + return LowerSIGN_EXTEND_INREG(Op, DAG); + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return LowerShift(Op, DAG); + } +} + +SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, + SelectionDAG &DAG) const { + SDValue Src = Op.getOperand(2); + if (isa<FrameIndexSDNode>(Src.getNode())) { + // CopyToReg nodes don't support FrameIndex operands. Other targets select + // the FI to some LEA-like instruction, but since we don't have that, we + // need to insert some kind of instruction that can take an FI operand and + // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy + // local.copy between Op and its FI operand. + SDValue Chain = Op.getOperand(0); + SDLoc DL(Op); + unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); + EVT VT = Src.getValueType(); + SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 + : WebAssembly::COPY_I64, + DL, VT, Src), + 0); + return Op.getNode()->getNumValues() == 1 + ? DAG.getCopyToReg(Chain, DL, Reg, Copy) + : DAG.getCopyToReg(Chain, DL, Reg, Copy, + Op.getNumOperands() == 4 ? Op.getOperand(3) + : SDValue()); + } + return SDValue(); +} + +SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, + SelectionDAG &DAG) const { + int FI = cast<FrameIndexSDNode>(Op)->getIndex(); + return DAG.getTargetFrameIndex(FI, Op.getValueType()); +} + +SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + if (!Subtarget->getTargetTriple().isOSEmscripten()) { + fail(DL, DAG, + "Non-Emscripten WebAssembly hasn't implemented " + "__builtin_return_address"); + return SDValue(); + } + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), + {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) + .first; +} + +SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + // Non-zero depths are not supported by WebAssembly currently. Use the + // legalizer's default expansion, which is to return 0 (what this function is + // documented to do). + if (Op.getConstantOperandVal(0) > 0) + return SDValue(); + + DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); + Register FP = + Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); +} + +SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const auto *GA = cast<GlobalAddressSDNode>(Op); + EVT VT = Op.getValueType(); + assert(GA->getTargetFlags() == 0 && + "Unexpected target flags on generic GlobalAddressSDNode"); + if (GA->getAddressSpace() != 0) + fail(DL, DAG, "WebAssembly only expects the 0 address space"); + + unsigned OperandFlags = 0; + if (isPositionIndependent()) { + const GlobalValue *GV = GA->getGlobal(); + if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) { + MachineFunction &MF = DAG.getMachineFunction(); + MVT PtrVT = getPointerTy(MF.getDataLayout()); + const char *BaseName; + if (GV->getValueType()->isFunctionTy()) { + BaseName = MF.createExternalSymbolName("__table_base"); + OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; + } + else { + BaseName = MF.createExternalSymbolName("__memory_base"); + OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; + } + SDValue BaseAddr = + DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, + DAG.getTargetExternalSymbol(BaseName, PtrVT)); + + SDValue SymAddr = DAG.getNode( + WebAssemblyISD::WrapperPIC, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), + OperandFlags)); + + return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); + } else { + OperandFlags = WebAssemblyII::MO_GOT; + } + } + + return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, + GA->getOffset(), OperandFlags)); +} + +SDValue +WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const auto *ES = cast<ExternalSymbolSDNode>(Op); + EVT VT = Op.getValueType(); + assert(ES->getTargetFlags() == 0 && + "Unexpected target flags on generic ExternalSymbolSDNode"); + return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); +} + +SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + // There's no need for a Wrapper node because we always incorporate a jump + // table operand into a BR_TABLE instruction, rather than ever + // materializing it in a register. + const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), + JT->getTargetFlags()); +} + +SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); + SDValue Index = Op.getOperand(2); + assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Index); + + MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); + const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; + + // Add an operand for each case. + for (auto MBB : MBBs) + Ops.push_back(DAG.getBasicBlock(MBB)); + + // TODO: For now, we just pick something arbitrary for a default case for now. + // We really want to sniff out the guard and put in the real default case (and + // delete the guard). + Ops.push_back(DAG.getBasicBlock(MBBs[0])); + + return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); +} + +SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); + + auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + + SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, + MFI->getVarargBufferVreg(), PtrVT); + return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), + MachinePointerInfo(SV), 0); +} + +SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned IntNo; + switch (Op.getOpcode()) { + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: + IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + break; + case ISD::INTRINSIC_WO_CHAIN: + IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + break; + default: + llvm_unreachable("Invalid intrinsic"); + } + SDLoc DL(Op); + + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + + case Intrinsic::wasm_lsda: { + EVT VT = Op.getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + auto &Context = MF.getMMI().getContext(); + MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + + Twine(MF.getFunctionNumber())); + return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, + DAG.getMCSymbol(S, PtrVT)); + } + + case Intrinsic::wasm_throw: { + // We only support C++ exceptions for now + int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); + if (Tag != CPP_EXCEPTION) + llvm_unreachable("Invalid tag!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + const char *SymName = MF.createExternalSymbolName("__cpp_exception"); + SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, + DAG.getTargetExternalSymbol(SymName, PtrVT)); + return DAG.getNode(WebAssemblyISD::THROW, DL, + MVT::Other, // outchain type + { + Op.getOperand(0), // inchain + SymNode, // exception symbol + Op.getOperand(3) // thrown value + }); + } + } +} + +SDValue +WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + // If sign extension operations are disabled, allow sext_inreg only if operand + // is a vector extract. SIMD does not depend on sign extension operations, but + // allowing sext_inreg in this context lets us have simple patterns to select + // extract_lane_s instructions. Expanding sext_inreg everywhere would be + // simpler in this file, but would necessitate large and brittle patterns to + // undo the expansion and select extract_lane_s instructions. + assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); + if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + const SDValue &Extract = Op.getOperand(0); + MVT VecT = Extract.getOperand(0).getSimpleValueType(); + MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode()) + ->getVT() + .getSimpleVT(); + MVT ExtractedVecT = + MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); + if (ExtractedVecT == VecT) + return Op; + // Bitcast vector to appropriate type to ensure ISel pattern coverage + const SDValue &Index = Extract.getOperand(1); + unsigned IndexVal = + static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue(); + unsigned Scale = + ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); + assert(Scale > 1); + SDValue NewIndex = + DAG.getConstant(IndexVal * Scale, DL, Index.getValueType()); + SDValue NewExtract = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), + DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), + NewExtract, Op.getOperand(1)); + } + // Otherwise expand + return SDValue(); +} + +SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const EVT VecT = Op.getValueType(); + const EVT LaneT = Op.getOperand(0).getValueType(); + const size_t Lanes = Op.getNumOperands(); + bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8; + + // BUILD_VECTORs are lowered to the instruction that initializes the highest + // possible number of lanes at once followed by a sequence of replace_lane + // instructions to individually initialize any remaining lanes. + + // TODO: Tune this. For example, lanewise swizzling is very expensive, so + // swizzled lanes should be given greater weight. + + // TODO: Investigate building vectors by shuffling together vectors built by + // separately specialized means. + + auto IsConstant = [](const SDValue &V) { + return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; + }; + + // Returns the source vector and index vector pair if they exist. Checks for: + // (extract_vector_elt + // $src, + // (sign_extend_inreg (extract_vector_elt $indices, $i)) + // ) + auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { + auto Bail = std::make_pair(SDValue(), SDValue()); + if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleSrc = Lane->getOperand(0); + const SDValue &IndexExt = Lane->getOperand(1); + if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) + return Bail; + const SDValue &Index = IndexExt->getOperand(0); + if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleIndices = Index->getOperand(0); + if (SwizzleSrc.getValueType() != MVT::v16i8 || + SwizzleIndices.getValueType() != MVT::v16i8 || + Index->getOperand(1)->getOpcode() != ISD::Constant || + Index->getConstantOperandVal(1) != I) + return Bail; + return std::make_pair(SwizzleSrc, SwizzleIndices); + }; + + using ValueEntry = std::pair<SDValue, size_t>; + SmallVector<ValueEntry, 16> SplatValueCounts; + + using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>; + SmallVector<SwizzleEntry, 16> SwizzleCounts; + + auto AddCount = [](auto &Counts, const auto &Val) { + auto CountIt = std::find_if(Counts.begin(), Counts.end(), + [&Val](auto E) { return E.first == Val; }); + if (CountIt == Counts.end()) { + Counts.emplace_back(Val, 1); + } else { + CountIt->second++; + } + }; + + auto GetMostCommon = [](auto &Counts) { + auto CommonIt = + std::max_element(Counts.begin(), Counts.end(), + [](auto A, auto B) { return A.second < B.second; }); + assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); + return *CommonIt; + }; + + size_t NumConstantLanes = 0; + + // Count eligible lanes for each type of vector creation op + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (Lane.isUndef()) + continue; + + AddCount(SplatValueCounts, Lane); + + if (IsConstant(Lane)) { + NumConstantLanes++; + } else if (CanSwizzle) { + auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); + if (SwizzleSrcs.first) + AddCount(SwizzleCounts, SwizzleSrcs); + } + } + + SDValue SplatValue; + size_t NumSplatLanes; + std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); + + SDValue SwizzleSrc; + SDValue SwizzleIndices; + size_t NumSwizzleLanes = 0; + if (SwizzleCounts.size()) + std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), + NumSwizzleLanes) = GetMostCommon(SwizzleCounts); + + // Predicate returning true if the lane is properly initialized by the + // original instruction + std::function<bool(size_t, const SDValue &)> IsLaneConstructed; + SDValue Result; + if (Subtarget->hasUnimplementedSIMD128()) { + // Prefer swizzles over vector consts over splats + if (NumSwizzleLanes >= NumSplatLanes && + NumSwizzleLanes >= NumConstantLanes) { + Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, + SwizzleIndices); + auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); + IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { + return Swizzled == GetSwizzleSrcs(I, Lane); + }; + } else if (NumConstantLanes >= NumSplatLanes) { + SmallVector<SDValue, 16> ConstLanes; + for (const SDValue &Lane : Op->op_values()) { + if (IsConstant(Lane)) { + ConstLanes.push_back(Lane); + } else if (LaneT.isFloatingPoint()) { + ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); + } else { + ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); + } + } + Result = DAG.getBuildVector(VecT, DL, ConstLanes); + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; + } + } + if (!Result) { + // Use a splat, but possibly a load_splat + LoadSDNode *SplattedLoad; + if (Subtarget->hasUnimplementedSIMD128() && + (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && + SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { + Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue); + } else { + Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); + } + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return Lane == SplatValue; + }; + } + + // Add replace_lane instructions for any unhandled values + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, + DAG.getConstant(I, DL, MVT::i32)); + } + + return Result; +} + +SDValue +WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); + MVT VecType = Op.getOperand(0).getSimpleValueType(); + assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); + size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; + + // Space for two vector args and sixteen mask indices + SDValue Ops[18]; + size_t OpIdx = 0; + Ops[OpIdx++] = Op.getOperand(0); + Ops[OpIdx++] = Op.getOperand(1); + + // Expand mask indices to byte indices and materialize them as operands + for (int M : Mask) { + for (size_t J = 0; J < LaneBytes; ++J) { + // Lower undefs (represented by -1 in mask) to zero + uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; + Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); + } + } + + return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); +} + +SDValue +WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, + SelectionDAG &DAG) const { + // Allow constant lane indices, expand variable lane indices + SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); + if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef()) + return Op; + else + // Perform default expansion + return SDValue(); +} + +static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { + EVT LaneT = Op.getSimpleValueType().getVectorElementType(); + // 32-bit and 64-bit unrolled shifts will have proper semantics + if (LaneT.bitsGE(MVT::i32)) + return DAG.UnrollVectorOp(Op.getNode()); + // Otherwise mask the shift value to get proper semantics from 32-bit shift + SDLoc DL(Op); + SDValue ShiftVal = Op.getOperand(1); + uint64_t MaskVal = LaneT.getSizeInBits() - 1; + SDValue MaskedShiftVal = DAG.getNode( + ISD::AND, // mask opcode + DL, ShiftVal.getValueType(), // masked value type + ShiftVal, // original shift value operand + DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand + ); + + return DAG.UnrollVectorOp( + DAG.getNode(Op.getOpcode(), // original shift opcode + DL, Op.getValueType(), // original return type + Op.getOperand(0), // original vector operand, + MaskedShiftVal // new masked shift value operand + ) + .getNode()); +} + +SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + // Only manually lower vector shifts + assert(Op.getSimpleValueType().isVector()); + + // Unroll non-splat vector shifts + BuildVectorSDNode *ShiftVec; + SDValue SplatVal; + if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) || + !(SplatVal = ShiftVec->getSplatValue())) + return unrollVectorShift(Op, DAG); + + // All splats except i64x2 const splats are handled by patterns + auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal); + if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64) + return Op; + + // i64x2 const splats are custom lowered to avoid unnecessary wraps + unsigned Opcode; + switch (Op.getOpcode()) { + case ISD::SHL: + Opcode = WebAssemblyISD::VEC_SHL; + break; + case ISD::SRA: + Opcode = WebAssemblyISD::VEC_SHR_S; + break; + case ISD::SRL: + Opcode = WebAssemblyISD::VEC_SHR_U; + break; + default: + llvm_unreachable("unexpected opcode"); + } + APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32); + return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), + DAG.getConstant(Shift, DL, MVT::i32)); +} + +//===----------------------------------------------------------------------===// +// WebAssembly Optimization Hooks +//===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h new file mode 100644 index 000000000000..a53e24a05542 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -0,0 +1,122 @@ +//- WebAssemblyISelLowering.h - WebAssembly DAG Lowering Interface -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the interfaces that WebAssembly uses to lower LLVM +/// code into a selection DAG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H + +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +namespace WebAssemblyISD { + +enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, +#define HANDLE_NODETYPE(NODE) NODE, +#include "WebAssemblyISD.def" +#undef HANDLE_NODETYPE +}; + +} // end namespace WebAssemblyISD + +class WebAssemblySubtarget; +class WebAssemblyTargetMachine; + +class WebAssemblyTargetLowering final : public TargetLowering { +public: + WebAssemblyTargetLowering(const TargetMachine &TM, + const WebAssemblySubtarget &STI); + +private: + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the + /// right decision when generating code for different targets. + const WebAssemblySubtarget *Subtarget; + + AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; + const char *getTargetNodeName(unsigned Opcode) const override; + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS, + Instruction *I = nullptr) const override; + bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align, + MachineMemOperand::Flags Flags, + bool *Fast) const override; + bool isIntDivCheap(EVT VT, AttributeList Attr) const override; + bool isVectorLoadExtDesirable(SDValue ExtVal) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const override; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, + SelectionDAG &DAG) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const override; + + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const override; + + const char *getClearCacheBuiltinName() const override { + report_fatal_error("llvm.clear_cache is not supported on wasm"); + } + + // Custom lowering hooks. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerIntrinsic(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; +}; + +namespace WebAssembly { +FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); +} // end namespace WebAssembly + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td new file mode 100644 index 000000000000..a9a99d38f9f1 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -0,0 +1,825 @@ +// WebAssemblyInstrAtomics.td-WebAssembly Atomic codegen support-*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly Atomic operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +let UseNamedOperandTable = 1 in +multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r = "", + string asmstr_s = "", bits<32> atomic_op = -1> { + defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, + !or(0xfe00, !and(0xff, atomic_op))>, + Requires<[HasAtomics]>; +} + +multiclass ATOMIC_NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "", + bits<32> atomic_op = -1> { + defm "" : NRI<oops, iops, pattern, asmstr, + !or(0xfe00, !and(0xff, atomic_op))>, + Requires<[HasAtomics]>; +} + +//===----------------------------------------------------------------------===// +// Atomic wait / notify +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1 in { +defm ATOMIC_NOTIFY : + ATOMIC_I<(outs I32:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count", + "atomic.notify \t${off}${p2align}", 0x00>; +let mayLoad = 1 in { +defm ATOMIC_WAIT_I32 : + ATOMIC_I<(outs I32:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, + I64:$timeout), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "i32.atomic.wait \t${off}${p2align}", 0x01>; +defm ATOMIC_WAIT_I64 : + ATOMIC_I<(outs I32:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, + I64:$timeout), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout", + "i64.atomic.wait \t${off}${p2align}", 0x02>; +} // mayLoad = 1 +} // hasSideEffects = 1 + +let Predicates = [HasAtomics] in { +// Select notifys with no constant offset. +def NotifyPatNoOffset : + Pat<(i32 (int_wasm_atomic_notify I32:$addr, I32:$count)), + (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>; + +// Select notifys with a constant offset. + +// Pattern with address + immediate offset +class NotifyPatImmOff<PatFrag operand> : + Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off), I32:$count)), + (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>; +def : NotifyPatImmOff<regPlusImm>; +def : NotifyPatImmOff<or_is_add>; + +// Select notifys with just a constant offset. +def NotifyPatOffsetOnly : + Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)), + (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>; + +def NotifyPatGlobalAddrOffOnly : + Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off), + I32:$count)), + (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>; + +// Select waits with no constant offset. +class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> : + Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)), + (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>; +def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>; +def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>; + +// Select waits with a constant offset. + +// Pattern with address + immediate offset +class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> : + Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)), + (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>; +def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>; +def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>; +def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>; +def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>; + +// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset. +class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> : + Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), + (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>; +def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>; +def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>; + +class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> : + Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)), + (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>; +def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>; +def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>; +} // Predicates = [HasAtomics] + +//===----------------------------------------------------------------------===// +// Atomic fences +//===----------------------------------------------------------------------===// + +// A compiler fence instruction that prevents reordering of instructions. +let Defs = [ARGUMENTS] in { +let isPseudo = 1, hasSideEffects = 1 in +defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; +let hasSideEffects = 1 in +defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence", + 0x03>; +} // Defs = [ARGUMENTS] + +//===----------------------------------------------------------------------===// +// Atomic loads +//===----------------------------------------------------------------------===// + +multiclass AtomicLoad<WebAssemblyRegClass rc, string name, int atomic_op> { + defm "" : WebAssemblyLoad<rc, name, !or(0xfe00, !and(0xff, atomic_op))>, + Requires<[HasAtomics]>; +} + +defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>; +defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>; + +// Select loads with no constant offset. +let Predicates = [HasAtomics] in { +def : LoadPatNoOffset<i32, atomic_load_32, ATOMIC_LOAD_I32>; +def : LoadPatNoOffset<i64, atomic_load_64, ATOMIC_LOAD_I64>; + +// Select loads with a constant offset. + +// Pattern with address + immediate offset +def : LoadPatImmOff<i32, atomic_load_32, regPlusImm, ATOMIC_LOAD_I32>; +def : LoadPatImmOff<i64, atomic_load_64, regPlusImm, ATOMIC_LOAD_I64>; +def : LoadPatImmOff<i32, atomic_load_32, or_is_add, ATOMIC_LOAD_I32>; +def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>; + +// Select loads with just a constant offset. +def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>; +def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>; + +def : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>; +def : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>; + +} // Predicates = [HasAtomics] + +// Extending loads. Note that there are only zero-extending atomic loads, no +// sign-extending loads. +defm ATOMIC_LOAD8_U_I32 : AtomicLoad<I32, "i32.atomic.load8_u", 0x12>; +defm ATOMIC_LOAD16_U_I32 : AtomicLoad<I32, "i32.atomic.load16_u", 0x13>; +defm ATOMIC_LOAD8_U_I64 : AtomicLoad<I64, "i64.atomic.load8_u", 0x14>; +defm ATOMIC_LOAD16_U_I64 : AtomicLoad<I64, "i64.atomic.load16_u", 0x15>; +defm ATOMIC_LOAD32_U_I64 : AtomicLoad<I64, "i64.atomic.load32_u", 0x16>; + +// Fragments for extending loads. These are different from regular loads because +// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and +// therefore don't have the extension type field. So instead of matching that, +// we match the patterns that the type legalizer expands them to. + +// We directly match zext patterns and select the zext atomic loads. +// i32 (zext (i8 (atomic_load_8))) gets legalized to +// i32 (and (i32 (atomic_load_8)), 255) +// These can be selected to a single zero-extending atomic load instruction. +def zext_aload_8_32 : + PatFrag<(ops node:$addr), (and (i32 (atomic_load_8 node:$addr)), 255)>; +def zext_aload_16_32 : + PatFrag<(ops node:$addr), (and (i32 (atomic_load_16 node:$addr)), 65535)>; +// Unlike regular loads, extension to i64 is handled differently than i32. +// i64 (zext (i8 (atomic_load_8))) gets legalized to +// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255) +def zext_aload_8_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>; +def zext_aload_16_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>; +def zext_aload_32_64 : + PatFrag<(ops node:$addr), + (zext (i32 (atomic_load node:$addr)))>; + +// We don't have single sext atomic load instructions. So for sext loads, we +// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit +// results) and select a zext load; the next instruction will be sext_inreg +// which is selected by itself. +def sext_aload_8_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_8 node:$addr)))>; +def sext_aload_16_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>; + +let Predicates = [HasAtomics] in { +// Select zero-extending loads with no constant offset. +def : LoadPatNoOffset<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>; +def : LoadPatNoOffset<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>; +def : LoadPatNoOffset<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>; +def : LoadPatNoOffset<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>; +def : LoadPatNoOffset<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>; + +// Select sign-extending loads with no constant offset +def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>; +def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>; +def : LoadPatNoOffset<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>; +def : LoadPatNoOffset<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>; +// 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s + +// Zero-extending loads with constant offset +def : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, ATOMIC_LOAD8_U_I32>; +def : LoadPatImmOff<i32, zext_aload_16_32, regPlusImm, ATOMIC_LOAD16_U_I32>; +def : LoadPatImmOff<i32, zext_aload_8_32, or_is_add, ATOMIC_LOAD8_U_I32>; +def : LoadPatImmOff<i32, zext_aload_16_32, or_is_add, ATOMIC_LOAD16_U_I32>; +def : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>; +def : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>; +def : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, ATOMIC_LOAD32_U_I64>; +def : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>; +def : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>; +def : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, ATOMIC_LOAD32_U_I64>; + +// Sign-extending loads with constant offset +def : LoadPatImmOff<i32, atomic_load_8, regPlusImm, ATOMIC_LOAD8_U_I32>; +def : LoadPatImmOff<i32, atomic_load_16, regPlusImm, ATOMIC_LOAD16_U_I32>; +def : LoadPatImmOff<i32, atomic_load_8, or_is_add, ATOMIC_LOAD8_U_I32>; +def : LoadPatImmOff<i32, atomic_load_16, or_is_add, ATOMIC_LOAD16_U_I32>; +def : LoadPatImmOff<i64, sext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>; +def : LoadPatImmOff<i64, sext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>; +def : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>; +def : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>; +// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 + +// Extending loads with just a constant offset +def : LoadPatOffsetOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>; +def : LoadPatOffsetOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>; +def : LoadPatOffsetOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>; +def : LoadPatOffsetOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>; +def : LoadPatOffsetOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>; +def : LoadPatOffsetOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>; +def : LoadPatOffsetOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>; +def : LoadPatOffsetOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>; +def : LoadPatOffsetOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>; + +def : LoadPatGlobalAddrOffOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>; +def : LoadPatGlobalAddrOffOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>; +def : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>; +def : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>; +def : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>; +def : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>; +def : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>; +def : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>; +def : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>; + +} // Predicates = [HasAtomics] + +//===----------------------------------------------------------------------===// +// Atomic stores +//===----------------------------------------------------------------------===// + +multiclass AtomicStore<WebAssemblyRegClass rc, string name, int atomic_op> { + defm "" : WebAssemblyStore<rc, name, !or(0xfe00, !and(0xff, atomic_op))>, + Requires<[HasAtomics]>; +} + +defm ATOMIC_STORE_I32 : AtomicStore<I32, "i32.atomic.store", 0x17>; +defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>; + +// We need an 'atomic' version of store patterns because store and atomic_store +// nodes have different operand orders: +// store: (store $val, $ptr) +// atomic_store: (store $ptr, $val) + +let Predicates = [HasAtomics] in { + +// Select stores with no constant offset. +class AStorePatNoOffset<ValueType ty, PatFrag kind, NI inst> : + Pat<(kind I32:$addr, ty:$val), (inst 0, 0, I32:$addr, ty:$val)>; +def : AStorePatNoOffset<i32, atomic_store_32, ATOMIC_STORE_I32>; +def : AStorePatNoOffset<i64, atomic_store_64, ATOMIC_STORE_I64>; + +// Select stores with a constant offset. + +// Pattern with address + immediate offset +class AStorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : + Pat<(kind (operand I32:$addr, imm:$off), ty:$val), + (inst 0, imm:$off, I32:$addr, ty:$val)>; +def : AStorePatImmOff<i32, atomic_store_32, regPlusImm, ATOMIC_STORE_I32>; +def : AStorePatImmOff<i64, atomic_store_64, regPlusImm, ATOMIC_STORE_I64>; +def : AStorePatImmOff<i32, atomic_store_32, or_is_add, ATOMIC_STORE_I32>; +def : AStorePatImmOff<i64, atomic_store_64, or_is_add, ATOMIC_STORE_I64>; + +// Select stores with just a constant offset. +class AStorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; +def : AStorePatOffsetOnly<i32, atomic_store_32, ATOMIC_STORE_I32>; +def : AStorePatOffsetOnly<i64, atomic_store_64, ATOMIC_STORE_I64>; + +class AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val), + (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>; +def : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, ATOMIC_STORE_I32>; +def : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>; + +} // Predicates = [HasAtomics] + +// Truncating stores. +defm ATOMIC_STORE8_I32 : AtomicStore<I32, "i32.atomic.store8", 0x19>; +defm ATOMIC_STORE16_I32 : AtomicStore<I32, "i32.atomic.store16", 0x1a>; +defm ATOMIC_STORE8_I64 : AtomicStore<I64, "i64.atomic.store8", 0x1b>; +defm ATOMIC_STORE16_I64 : AtomicStore<I64, "i64.atomic.store16", 0x1c>; +defm ATOMIC_STORE32_I64 : AtomicStore<I64, "i64.atomic.store32", 0x1d>; + +// Fragments for truncating stores. + +// We don't have single truncating atomic store instructions. For 32-bit +// instructions, we just need to match bare atomic stores. On the other hand, +// truncating stores from i64 values are once truncated to i32 first. +class trunc_astore_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), + (kind node:$addr, (i32 (trunc (i64 node:$val))))>; +def trunc_astore_8_64 : trunc_astore_64<atomic_store_8>; +def trunc_astore_16_64 : trunc_astore_64<atomic_store_16>; +def trunc_astore_32_64 : trunc_astore_64<atomic_store_32>; + +let Predicates = [HasAtomics] in { + +// Truncating stores with no constant offset +def : AStorePatNoOffset<i32, atomic_store_8, ATOMIC_STORE8_I32>; +def : AStorePatNoOffset<i32, atomic_store_16, ATOMIC_STORE16_I32>; +def : AStorePatNoOffset<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>; +def : AStorePatNoOffset<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>; +def : AStorePatNoOffset<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>; + +// Truncating stores with a constant offset +def : AStorePatImmOff<i32, atomic_store_8, regPlusImm, ATOMIC_STORE8_I32>; +def : AStorePatImmOff<i32, atomic_store_16, regPlusImm, ATOMIC_STORE16_I32>; +def : AStorePatImmOff<i64, trunc_astore_8_64, regPlusImm, ATOMIC_STORE8_I64>; +def : AStorePatImmOff<i64, trunc_astore_16_64, regPlusImm, ATOMIC_STORE16_I64>; +def : AStorePatImmOff<i64, trunc_astore_32_64, regPlusImm, ATOMIC_STORE32_I64>; +def : AStorePatImmOff<i32, atomic_store_8, or_is_add, ATOMIC_STORE8_I32>; +def : AStorePatImmOff<i32, atomic_store_16, or_is_add, ATOMIC_STORE16_I32>; +def : AStorePatImmOff<i64, trunc_astore_8_64, or_is_add, ATOMIC_STORE8_I64>; +def : AStorePatImmOff<i64, trunc_astore_16_64, or_is_add, ATOMIC_STORE16_I64>; +def : AStorePatImmOff<i64, trunc_astore_32_64, or_is_add, ATOMIC_STORE32_I64>; + +// Truncating stores with just a constant offset +def : AStorePatOffsetOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>; +def : AStorePatOffsetOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>; +def : AStorePatOffsetOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>; +def : AStorePatOffsetOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>; +def : AStorePatOffsetOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>; + +def : AStorePatGlobalAddrOffOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>; +def : AStorePatGlobalAddrOffOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>; +def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>; +def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>; +def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>; + +} // Predicates = [HasAtomics] + +//===----------------------------------------------------------------------===// +// Atomic binary read-modify-writes +//===----------------------------------------------------------------------===// + +multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string name, + int atomic_op> { + defm "" : + ATOMIC_I<(outs rc:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"), + !strconcat(name, "\t${off}${p2align}"), atomic_op>; +} + +defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0x1e>; +defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0x1f>; +defm ATOMIC_RMW8_U_ADD_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0x20>; +defm ATOMIC_RMW16_U_ADD_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0x21>; +defm ATOMIC_RMW8_U_ADD_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0x22>; +defm ATOMIC_RMW16_U_ADD_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0x23>; +defm ATOMIC_RMW32_U_ADD_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0x24>; + +defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0x25>; +defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0x26>; +defm ATOMIC_RMW8_U_SUB_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0x27>; +defm ATOMIC_RMW16_U_SUB_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0x28>; +defm ATOMIC_RMW8_U_SUB_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0x29>; +defm ATOMIC_RMW16_U_SUB_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0x2a>; +defm ATOMIC_RMW32_U_SUB_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0x2b>; + +defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0x2c>; +defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0x2d>; +defm ATOMIC_RMW8_U_AND_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0x2e>; +defm ATOMIC_RMW16_U_AND_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0x2f>; +defm ATOMIC_RMW8_U_AND_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0x30>; +defm ATOMIC_RMW16_U_AND_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0x31>; +defm ATOMIC_RMW32_U_AND_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0x32>; + +defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0x33>; +defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0x34>; +defm ATOMIC_RMW8_U_OR_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0x35>; +defm ATOMIC_RMW16_U_OR_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0x36>; +defm ATOMIC_RMW8_U_OR_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0x37>; +defm ATOMIC_RMW16_U_OR_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0x38>; +defm ATOMIC_RMW32_U_OR_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0x39>; + +defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0x3a>; +defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0x3b>; +defm ATOMIC_RMW8_U_XOR_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0x3c>; +defm ATOMIC_RMW16_U_XOR_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0x3d>; +defm ATOMIC_RMW8_U_XOR_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0x3e>; +defm ATOMIC_RMW16_U_XOR_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0x3f>; +defm ATOMIC_RMW32_U_XOR_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0x40>; + +defm ATOMIC_RMW_XCHG_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0x41>; +defm ATOMIC_RMW_XCHG_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0x42>; +defm ATOMIC_RMW8_U_XCHG_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0x43>; +defm ATOMIC_RMW16_U_XCHG_I32 : + WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0x44>; +defm ATOMIC_RMW8_U_XCHG_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0x45>; +defm ATOMIC_RMW16_U_XCHG_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0x46>; +defm ATOMIC_RMW32_U_XCHG_I64 : + WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0x47>; + +// Select binary RMWs with no constant offset. +class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind I32:$addr, ty:$val)), (inst 0, 0, I32:$addr, ty:$val)>; + +// Select binary RMWs with a constant offset. + +// Pattern with address + immediate offset +class BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : + Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), + (inst 0, imm:$off, I32:$addr, ty:$val)>; + +// Select binary RMWs with just a constant offset. +class BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind imm:$off, ty:$val)), + (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; + +class BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)), + (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>; + +// Patterns for various addressing modes. +multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32, + NI inst_64> { + def : BinRMWPatNoOffset<i32, rmw_32, inst_32>; + def : BinRMWPatNoOffset<i64, rmw_64, inst_64>; + + def : BinRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>; + def : BinRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>; + def : BinRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>; + def : BinRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>; + + def : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>; + def : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>; + + def : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>; + def : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; +} + +let Predicates = [HasAtomics] in { +defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64, ATOMIC_RMW_ADD_I32, + ATOMIC_RMW_ADD_I64>; +defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64, ATOMIC_RMW_SUB_I32, + ATOMIC_RMW_SUB_I64>; +defm : BinRMWPattern<atomic_load_and_32, atomic_load_and_64, ATOMIC_RMW_AND_I32, + ATOMIC_RMW_AND_I64>; +defm : BinRMWPattern<atomic_load_or_32, atomic_load_or_64, ATOMIC_RMW_OR_I32, + ATOMIC_RMW_OR_I64>; +defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64, ATOMIC_RMW_XOR_I32, + ATOMIC_RMW_XOR_I64>; +defm : BinRMWPattern<atomic_swap_32, atomic_swap_64, ATOMIC_RMW_XCHG_I32, + ATOMIC_RMW_XCHG_I64>; +} // Predicates = [HasAtomics] + +// Truncating & zero-extending binary RMW patterns. +// These are combined patterns of truncating store patterns and zero-extending +// load patterns above. +class zext_bin_rmw_8_32<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), + (and (i32 (kind node:$addr, node:$val)), 255)>; +class zext_bin_rmw_16_32<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), + (and (i32 (kind node:$addr, node:$val)), 65535)>; +class zext_bin_rmw_8_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), + (and (i64 (anyext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$val))))))), 255)>; +class zext_bin_rmw_16_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), + (and (i64 (anyext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$val))))))), 65535)>; +class zext_bin_rmw_32_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), + (zext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>; + +// Truncating & sign-extending binary RMW patterns. +// These are combined patterns of truncating store patterns and sign-extending +// load patterns above. We match subword RMWs (for 32-bit) and anyext RMWs (for +// 64-bit) and select a zext RMW; the next instruction will be sext_inreg which +// is selected by itself. +class sext_bin_rmw_8_32<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), (kind node:$addr, node:$val)>; +class sext_bin_rmw_16_32<PatFrag kind> : sext_bin_rmw_8_32<kind>; +class sext_bin_rmw_8_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$val), + (anyext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>; +class sext_bin_rmw_16_64<PatFrag kind> : sext_bin_rmw_8_64<kind>; +// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s + +// Patterns for various addressing modes for truncating-extending binary RMWs. +multiclass BinRMWTruncExtPattern< + PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64, + NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> { + // Truncating-extending binary RMWs with no constant offset + def : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; + def : BinRMWPatNoOffset<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; + def : BinRMWPatNoOffset<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; + def : BinRMWPatNoOffset<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; + def : BinRMWPatNoOffset<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; + + def : BinRMWPatNoOffset<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; + def : BinRMWPatNoOffset<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; + def : BinRMWPatNoOffset<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; + def : BinRMWPatNoOffset<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; + + // Truncating-extending binary RMWs with a constant offset + def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; + def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; + def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, regPlusImm, inst32_64>; + def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, or_is_add, inst32_64>; + + def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; + def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; + def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + + // Truncating-extending binary RMWs with just a constant offset + def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; + def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; + def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; + def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; + def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; + + def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; + def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; + def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; + def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; + + def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>; + def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>; + def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>; + def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>; + def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>; + + def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>; + def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>; + def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>; + def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>; +} + +let Predicates = [HasAtomics] in { +defm : BinRMWTruncExtPattern< + atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64, + ATOMIC_RMW8_U_ADD_I32, ATOMIC_RMW16_U_ADD_I32, + ATOMIC_RMW8_U_ADD_I64, ATOMIC_RMW16_U_ADD_I64, ATOMIC_RMW32_U_ADD_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32, atomic_load_sub_64, + ATOMIC_RMW8_U_SUB_I32, ATOMIC_RMW16_U_SUB_I32, + ATOMIC_RMW8_U_SUB_I64, ATOMIC_RMW16_U_SUB_I64, ATOMIC_RMW32_U_SUB_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_and_8, atomic_load_and_16, atomic_load_and_32, atomic_load_and_64, + ATOMIC_RMW8_U_AND_I32, ATOMIC_RMW16_U_AND_I32, + ATOMIC_RMW8_U_AND_I64, ATOMIC_RMW16_U_AND_I64, ATOMIC_RMW32_U_AND_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_or_8, atomic_load_or_16, atomic_load_or_32, atomic_load_or_64, + ATOMIC_RMW8_U_OR_I32, ATOMIC_RMW16_U_OR_I32, + ATOMIC_RMW8_U_OR_I64, ATOMIC_RMW16_U_OR_I64, ATOMIC_RMW32_U_OR_I64>; +defm : BinRMWTruncExtPattern< + atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32, atomic_load_xor_64, + ATOMIC_RMW8_U_XOR_I32, ATOMIC_RMW16_U_XOR_I32, + ATOMIC_RMW8_U_XOR_I64, ATOMIC_RMW16_U_XOR_I64, ATOMIC_RMW32_U_XOR_I64>; +defm : BinRMWTruncExtPattern< + atomic_swap_8, atomic_swap_16, atomic_swap_32, atomic_swap_64, + ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32, + ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>; +} // Predicates = [HasAtomics] + +//===----------------------------------------------------------------------===// +// Atomic ternary read-modify-writes +//===----------------------------------------------------------------------===// + +// TODO LLVM IR's cmpxchg instruction returns a pair of {loaded value, success +// flag}. When we use the success flag or both values, we can't make use of i64 +// truncate/extend versions of instructions for now, which is suboptimal. +// Consider adding a pass after instruction selection that optimizes this case +// if it is frequent. + +multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name, + int atomic_op> { + defm "" : + ATOMIC_I<(outs rc:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp, + rc:$new_), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"), + !strconcat(name, "\t${off}${p2align}"), atomic_op>; +} + +defm ATOMIC_RMW_CMPXCHG_I32 : + WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0x48>; +defm ATOMIC_RMW_CMPXCHG_I64 : + WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0x49>; +defm ATOMIC_RMW8_U_CMPXCHG_I32 : + WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0x4a>; +defm ATOMIC_RMW16_U_CMPXCHG_I32 : + WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0x4b>; +defm ATOMIC_RMW8_U_CMPXCHG_I64 : + WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0x4c>; +defm ATOMIC_RMW16_U_CMPXCHG_I64 : + WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0x4d>; +defm ATOMIC_RMW32_U_CMPXCHG_I64 : + WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0x4e>; + +// Select ternary RMWs with no constant offset. +class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)), + (inst 0, 0, I32:$addr, ty:$exp, ty:$new)>; + +// Select ternary RMWs with a constant offset. + +// Pattern with address + immediate offset +class TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : + Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), + (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>; + +// Select ternary RMWs with just a constant offset. +class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), + (inst 0, imm:$off, (CONST_I32 0), ty:$exp, ty:$new)>; + +class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)), + (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>; + +// Patterns for various addressing modes. +multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32, + NI inst_64> { + def : TerRMWPatNoOffset<i32, rmw_32, inst_32>; + def : TerRMWPatNoOffset<i64, rmw_64, inst_64>; + + def : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>; + def : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>; + def : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>; + def : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>; + + def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>; + def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>; + + def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>; + def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>; +} + +let Predicates = [HasAtomics] in +defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64, + ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>; + +// Truncating & zero-extending ternary RMW patterns. +// DAG legalization & optimization before instruction selection may introduce +// additional nodes such as anyext or assertzext depending on operand types. +class zext_ter_rmw_8_32<PatFrag kind> : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (and (i32 (kind node:$addr, node:$exp, node:$new)), 255)>; +class zext_ter_rmw_16_32<PatFrag kind> : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (and (i32 (kind node:$addr, node:$exp, node:$new)), 65535)>; +class zext_ter_rmw_8_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (zext (i32 (assertzext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$exp))), + (i32 (trunc (i64 node:$new))))))))>; +class zext_ter_rmw_16_64<PatFrag kind> : zext_ter_rmw_8_64<kind>; +class zext_ter_rmw_32_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (zext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$exp))), + (i32 (trunc (i64 node:$new))))))>; + +// Truncating & sign-extending ternary RMW patterns. +// We match subword RMWs (for 32-bit) and anyext RMWs (for 64-bit) and select a +// zext RMW; the next instruction will be sext_inreg which is selected by +// itself. +class sext_ter_rmw_8_32<PatFrag kind> : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (kind node:$addr, node:$exp, node:$new)>; +class sext_ter_rmw_16_32<PatFrag kind> : sext_ter_rmw_8_32<kind>; +class sext_ter_rmw_8_64<PatFrag kind> : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (anyext (i32 (assertzext (i32 + (kind node:$addr, + (i32 (trunc (i64 node:$exp))), + (i32 (trunc (i64 node:$new))))))))>; +class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>; +// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s + +// Patterns for various addressing modes for truncating-extending ternary RMWs. +multiclass TerRMWTruncExtPattern< + PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64, + NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> { + // Truncating-extending ternary RMWs with no constant offset + def : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; + def : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; + def : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; + def : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; + def : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; + + def : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; + def : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; + def : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; + def : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; + + // Truncating-extending ternary RMWs with a constant offset + def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; + def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; + def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm, inst32_64>; + def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>; + + def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>; + def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>; + def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>; + def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>; + def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>; + def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>; + def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>; + def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>; + + // Truncating-extending ternary RMWs with just a constant offset + def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; + def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; + def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; + def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; + def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; + + def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; + def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; + def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; + def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; + + def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>; + def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>; + def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>; + def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>; + def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>; + + def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>; + def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>; + def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>; + def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>; +} + +let Predicates = [HasAtomics] in +defm : TerRMWTruncExtPattern< + atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64, + ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, + ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, + ATOMIC_RMW32_U_CMPXCHG_I64>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td new file mode 100644 index 000000000000..05735cf6d31f --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -0,0 +1,71 @@ +// WebAssemblyInstrBulkMemory.td - bulk memory codegen support --*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly bulk memory codegen constructs. +/// +//===----------------------------------------------------------------------===// + +// Instruction requiring HasBulkMemory and the bulk memory prefix byte +multiclass BULK_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r = "", + string asmstr_s = "", bits<32> simdop = -1> { + defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, + !or(0xfc00, !and(0xff, simdop))>, + Requires<[HasBulkMemory]>; +} + +// Bespoke types and nodes for bulk memory ops +def wasm_memcpy_t : SDTypeProfile<0, 5, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisPtrTy<3>, SDTCisInt<4>] +>; +def wasm_memcpy : SDNode<"WebAssemblyISD::MEMORY_COPY", wasm_memcpy_t, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; + +def wasm_memset_t : SDTypeProfile<0, 4, + [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>] +>; +def wasm_memset : SDNode<"WebAssemblyISD::MEMORY_FILL", wasm_memset_t, + [SDNPHasChain, SDNPMayStore]>; + +let mayStore = 1, hasSideEffects = 1 in +defm MEMORY_INIT : + BULK_I<(outs), + (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest, + I32:$offset, I32:$size), + (outs), (ins i32imm_op:$seg, i32imm_op:$idx), + [(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest, + I32:$offset, I32:$size + )], + "memory.init\t$seg, $idx, $dest, $offset, $size", + "memory.init\t$seg, $idx", 0x08>; + +let hasSideEffects = 1 in +defm DATA_DROP : + BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg), + [(int_wasm_data_drop (i32 timm:$seg))], + "data.drop\t$seg", "data.drop\t$seg", 0x09>; + +let mayLoad = 1, mayStore = 1 in +defm MEMORY_COPY : + BULK_I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx, + I32:$dst, I32:$src, I32:$len), + (outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx), + [(wasm_memcpy (i32 imm:$src_idx), (i32 imm:$dst_idx), + I32:$dst, I32:$src, I32:$len + )], + "memory.copy\t$src_idx, $dst_idx, $dst, $src, $len", + "memory.copy\t$src_idx, $dst_idx", 0x0a>; + +let mayStore = 1 in +defm MEMORY_FILL : + BULK_I<(outs), (ins i32imm_op:$idx, I32:$dst, I32:$value, I32:$size), + (outs), (ins i32imm_op:$idx), + [(wasm_memset (i32 imm:$idx), I32:$dst, I32:$value, I32:$size)], + "memory.fill\t$idx, $dst, $value, $size", + "memory.fill\t$idx", 0x0b>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td new file mode 100644 index 000000000000..703c15d58c93 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -0,0 +1,177 @@ +//===- WebAssemblyInstrCall.td-WebAssembly Call codegen support -*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly Call operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +// TODO: addr64: These currently assume the callee address is 32-bit. +// FIXME: add $type to first call_indirect asmstr (and maybe $flags) + +// Call sequence markers. These have an immediate which represents the amount of +// stack space to allocate or free, which is used for varargs lowering. +let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in { +defm ADJCALLSTACKDOWN : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2), + [(WebAssemblycallseq_start timm:$amt, timm:$amt2)]>; +defm ADJCALLSTACKUP : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2), + [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>; +} // Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 + +multiclass CALL<ValueType vt, WebAssemblyRegClass rt, string prefix, + list<Predicate> preds = []> { + defm CALL_#vt : + I<(outs rt:$dst), (ins function32_op:$callee, variable_ops), + (outs), (ins function32_op:$callee), + [(set (vt rt:$dst), (WebAssemblycall1 (i32 imm:$callee)))], + !strconcat(prefix, "call\t$dst, $callee"), + !strconcat(prefix, "call\t$callee"), + 0x10>, + Requires<preds>; + + let isCodeGenOnly = 1 in + defm PCALL_INDIRECT_#vt : + I<(outs rt:$dst), (ins I32:$callee, variable_ops), + (outs), (ins I32:$callee), + [(set (vt rt:$dst), (WebAssemblycall1 I32:$callee))], + "PSEUDO CALL INDIRECT\t$callee", + "PSEUDO CALL INDIRECT\t$callee">, + Requires<preds>; + + defm CALL_INDIRECT_#vt : + I<(outs rt:$dst), + (ins TypeIndex:$type, i32imm:$flags, variable_ops), + (outs), (ins TypeIndex:$type, i32imm:$flags), + [], + !strconcat(prefix, "call_indirect\t$dst"), + !strconcat(prefix, "call_indirect\t$type"), + 0x11>, + Requires<preds>; +} + +let Uses = [SP32, SP64], isCall = 1 in { +defm "" : CALL<i32, I32, "i32.">; +defm "" : CALL<i64, I64, "i64.">; +defm "" : CALL<f32, F32, "f32.">; +defm "" : CALL<f64, F64, "f64.">; +defm "" : CALL<exnref, EXNREF, "exnref.", [HasExceptionHandling]>; +defm "" : CALL<v16i8, V128, "v128.", [HasSIMD128]>; +defm "" : CALL<v8i16, V128, "v128.", [HasSIMD128]>; +defm "" : CALL<v4i32, V128, "v128.", [HasSIMD128]>; +defm "" : CALL<v2i64, V128, "v128.", [HasSIMD128]>; +defm "" : CALL<v4f32, V128, "v128.", [HasSIMD128]>; +defm "" : CALL<v2f64, V128, "v128.", [HasSIMD128]>; + +let IsCanonical = 1 in { +defm CALL_VOID : + I<(outs), (ins function32_op:$callee, variable_ops), + (outs), (ins function32_op:$callee), + [(WebAssemblycall0 (i32 imm:$callee))], + "call \t$callee", "call\t$callee", 0x10>; + +let isReturn = 1 in +defm RET_CALL : + I<(outs), (ins function32_op:$callee, variable_ops), + (outs), (ins function32_op:$callee), + [(WebAssemblyretcall (i32 imm:$callee))], + "return_call \t$callee", "return_call\t$callee", 0x12>, + Requires<[HasTailCall]>; + +let isCodeGenOnly = 1 in +defm PCALL_INDIRECT_VOID : + I<(outs), (ins I32:$callee, variable_ops), + (outs), (ins I32:$callee), + [(WebAssemblycall0 I32:$callee)], + "PSEUDO CALL INDIRECT\t$callee", + "PSEUDO CALL INDIRECT\t$callee">; + +defm CALL_INDIRECT_VOID : + I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops), + (outs), (ins TypeIndex:$type, i32imm:$flags), + [], + "call_indirect\t", "call_indirect\t$type", + 0x11>; + +let isReturn = 1 in +defm RET_CALL_INDIRECT : + I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops), + (outs), (ins TypeIndex:$type, i32imm:$flags), + [], + "return_call_indirect\t", "return_call_indirect\t$type", + 0x13>, + Requires<[HasTailCall]>; + +let isCodeGenOnly = 1, isReturn = 1 in +defm PRET_CALL_INDIRECT: + I<(outs), (ins I32:$callee, variable_ops), + (outs), (ins I32:$callee), + [(WebAssemblyretcall I32:$callee)], + "PSEUDO RET_CALL INDIRECT\t$callee", + "PSEUDO RET_CALL INDIRECT\t$callee">, + Requires<[HasTailCall]>; + +} // IsCanonical = 1 +} // Uses = [SP32,SP64], isCall = 1 + +// Patterns for matching a direct call to a global address. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_i32 tglobaladdr:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_i64 tglobaladdr:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_f32 tglobaladdr:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_f64 tglobaladdr:$callee)>; +def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v2i64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_exnref tglobaladdr:$callee)>, + Requires<[HasExceptionHandling]>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), + (CALL_VOID tglobaladdr:$callee)>; +def : Pat<(WebAssemblyretcall (WebAssemblywrapper tglobaladdr:$callee)), + (RET_CALL tglobaladdr:$callee)>, Requires<[HasTailCall]>; + +// Patterns for matching a direct call to an external symbol. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_i32 texternalsym:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_i64 texternalsym:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_f32 texternalsym:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_f64 texternalsym:$callee)>; +def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v2i64 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_exnref texternalsym:$callee)>, + Requires<[HasExceptionHandling]>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), + (CALL_VOID texternalsym:$callee)>; +def : Pat<(WebAssemblyretcall (WebAssemblywrapper texternalsym:$callee)), + (RET_CALL texternalsym:$callee)>, Requires<[HasTailCall]>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td new file mode 100644 index 000000000000..1afc9a8790dc --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -0,0 +1,161 @@ +//===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly control-flow code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { +// The condition operand is a boolean value which WebAssembly represents as i32. +defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond), + (outs), (ins bb_op:$dst), + [(brcond I32:$cond, bb:$dst)], + "br_if \t$dst, $cond", "br_if \t$dst", 0x0d>; +let isCodeGenOnly = 1 in +defm BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), + (outs), (ins bb_op:$dst), []>; +let isBarrier = 1 in +defm BR : NRI<(outs), (ins bb_op:$dst), + [(br bb:$dst)], + "br \t$dst", 0x0c>; +} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 + +def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), + (BR_IF bb_op:$dst, I32:$cond)>; +def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), + (BR_UNLESS bb_op:$dst, I32:$cond)>; + +// A list of branch targets enclosed in {} and separated by comma. +// Used by br_table only. +def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; } +let OperandNamespace = "WebAssembly", OperandType = "OPERAND_BRLIST" in +def brlist : Operand<i32> { + let ParserMatchClass = BrListAsmOperand; + let PrintMethod = "printBrList"; +} + +// TODO: SelectionDAG's lowering insists on using a pointer as the index for +// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode +// currently. +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops), + (outs), (ins brlist:$brl), + [(WebAssemblybr_table I32:$index)], + "br_table \t$index", "br_table \t$brl", + 0x0e>; +defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops), + (outs), (ins brlist:$brl), + [(WebAssemblybr_table I64:$index)], + "br_table \t$index", "br_table \t$brl", + 0x0e>; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +// This is technically a control-flow instruction, since all it affects is the +// IP. +defm NOP : NRI<(outs), (ins), [], "nop", 0x01>; + +// Placemarkers to indicate the start or end of a block or loop scope. +// These use/clobber VALUE_STACK to prevent them from being moved into the +// middle of an expression tree. +let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in { +defm BLOCK : NRI<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>; +defm LOOP : NRI<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>; + +defm IF : I<(outs), (ins Signature:$sig, I32:$cond), + (outs), (ins Signature:$sig), + [], "if \t$sig, $cond", "if \t$sig", 0x04>; +defm ELSE : NRI<(outs), (ins), [], "else", 0x05>; + +// END_BLOCK, END_LOOP, END_IF and END_FUNCTION are represented with the same +// opcode in wasm. +defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>; +defm END_LOOP : NRI<(outs), (ins), [], "end_loop", 0x0b>; +defm END_IF : NRI<(outs), (ins), [], "end_if", 0x0b>; +// Generic instruction, for disassembler. +let IsCanonical = 1 in +defm END : NRI<(outs), (ins), [], "end", 0x0b>; +let isTerminator = 1, isBarrier = 1 in +defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>; +} // Uses = [VALUE_STACK], Defs = [VALUE_STACK] + + +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { + +let isReturn = 1 in { + +defm RETURN : I<(outs), (ins variable_ops), (outs), (ins), + [(WebAssemblyreturn)], + "return", "return", 0x0f>; +// Equivalent to RETURN, for use at the end of a function when wasm +// semantics return by falling off the end of the block. +let isCodeGenOnly = 1 in +defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; + +} // isReturn = 1 + +defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +//===----------------------------------------------------------------------===// +// Exception handling instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasExceptionHandling] in { + +// Throwing an exception: throw / rethrow +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +defm THROW : I<(outs), (ins event_op:$tag, variable_ops), + (outs), (ins event_op:$tag), + [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))], + "throw \t$tag", "throw \t$tag", 0x08>; +defm RETHROW : I<(outs), (ins EXNREF:$exn), (outs), (ins), [], + "rethrow \t$exn", "rethrow", 0x09>; +// Pseudo instruction to be the lowering target of int_wasm_rethrow_in_catch +// intrinsic. Will be converted to the real rethrow instruction later. +let isPseudo = 1 in +defm RETHROW_IN_CATCH : NRI<(outs), (ins), [(int_wasm_rethrow_in_catch)], + "rethrow_in_catch", 0>; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +// Region within which an exception is caught: try / end_try +let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in { +defm TRY : NRI<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>; +defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>; +} // Uses = [VALUE_STACK], Defs = [VALUE_STACK] + +// Catching an exception: catch / extract_exception +let hasCtrlDep = 1, hasSideEffects = 1 in +defm CATCH : I<(outs EXNREF:$dst), (ins), (outs), (ins), [], + "catch \t$dst", "catch", 0x07>; + +// Querying / extracing exception: br_on_exn +// br_on_exn queries an exnref to see if it matches the corresponding exception +// tag index. If true it branches to the given label and pushes the +// corresponding argument values of the exception onto the stack. +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in +defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXNREF:$exn), + (outs), (ins bb_op:$dst, event_op:$tag), [], + "br_on_exn \t$dst, $tag, $exn", "br_on_exn \t$dst, $tag", + 0x0a>; +// This is a pseudo instruction that simulates popping a value from stack, which +// has been pushed by br_on_exn +let isCodeGenOnly = 1, hasSideEffects = 1 in +defm EXTRACT_EXCEPTION_I32 : NRI<(outs I32:$dst), (ins), + [(set I32:$dst, (int_wasm_extract_exception))], + "extract_exception\t$dst">; + +// Pseudo instructions: cleanupret / catchret +let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, + isPseudo = 1, isEHScopeReturn = 1 in { + defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "cleanupret", 0>; + defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from), + [(catchret bb:$dst, bb:$from)], "catchret", 0>; +} // isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, + // isPseudo = 1, isEHScopeReturn = 1 +} // Predicates = [HasExceptionHandling] diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td new file mode 100644 index 000000000000..f3d9c5d5032c --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -0,0 +1,248 @@ +//===-- WebAssemblyInstrConv.td-WebAssembly Conversion support -*- tablegen -*-= +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly datatype conversions, truncations, reinterpretations, +/// promotions, and demotions operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +defm I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins), + [(set I32:$dst, (trunc I64:$src))], + "i32.wrap_i64\t$dst, $src", "i32.wrap_i64", 0xa7>; + +defm I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins), + [(set I64:$dst, (sext I32:$src))], + "i64.extend_i32_s\t$dst, $src", "i64.extend_i32_s", + 0xac>; +defm I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins), + [(set I64:$dst, (zext I32:$src))], + "i64.extend_i32_u\t$dst, $src", "i64.extend_i32_u", + 0xad>; + +let Predicates = [HasSignExt] in { +defm I32_EXTEND8_S_I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins), + [(set I32:$dst, (sext_inreg I32:$src, i8))], + "i32.extend8_s\t$dst, $src", "i32.extend8_s", + 0xc0>; +defm I32_EXTEND16_S_I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins), + [(set I32:$dst, (sext_inreg I32:$src, i16))], + "i32.extend16_s\t$dst, $src", "i32.extend16_s", + 0xc1>; +defm I64_EXTEND8_S_I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins), + [(set I64:$dst, (sext_inreg I64:$src, i8))], + "i64.extend8_s\t$dst, $src", "i64.extend8_s", + 0xc2>; +defm I64_EXTEND16_S_I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins), + [(set I64:$dst, (sext_inreg I64:$src, i16))], + "i64.extend16_s\t$dst, $src", "i64.extend16_s", + 0xc3>; +defm I64_EXTEND32_S_I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins), + [(set I64:$dst, (sext_inreg I64:$src, i32))], + "i64.extend32_s\t$dst, $src", "i64.extend32_s", + 0xc4>; +} // Predicates = [HasSignExt] + +// Expand a "don't care" extend into zero-extend (chosen over sign-extend +// somewhat arbitrarily, although it favors popular hardware architectures +// and is conceptually a simpler operation). +def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>; + +// Conversion from floating point to integer instructions which don't trap on +// overflow or invalid. +defm I32_TRUNC_S_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins), + [(set I32:$dst, (fp_to_sint F32:$src))], + "i32.trunc_sat_f32_s\t$dst, $src", + "i32.trunc_sat_f32_s", 0xfc00>, + Requires<[HasNontrappingFPToInt]>; +defm I32_TRUNC_U_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins), + [(set I32:$dst, (fp_to_uint F32:$src))], + "i32.trunc_sat_f32_u\t$dst, $src", + "i32.trunc_sat_f32_u", 0xfc01>, + Requires<[HasNontrappingFPToInt]>; +defm I64_TRUNC_S_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins), + [(set I64:$dst, (fp_to_sint F32:$src))], + "i64.trunc_sat_f32_s\t$dst, $src", + "i64.trunc_sat_f32_s", 0xfc04>, + Requires<[HasNontrappingFPToInt]>; +defm I64_TRUNC_U_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins), + [(set I64:$dst, (fp_to_uint F32:$src))], + "i64.trunc_sat_f32_u\t$dst, $src", + "i64.trunc_sat_f32_u", 0xfc05>, + Requires<[HasNontrappingFPToInt]>; +defm I32_TRUNC_S_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins), + [(set I32:$dst, (fp_to_sint F64:$src))], + "i32.trunc_sat_f64_s\t$dst, $src", + "i32.trunc_sat_f64_s", 0xfc02>, + Requires<[HasNontrappingFPToInt]>; +defm I32_TRUNC_U_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins), + [(set I32:$dst, (fp_to_uint F64:$src))], + "i32.trunc_sat_f64_u\t$dst, $src", + "i32.trunc_sat_f64_u", 0xfc03>, + Requires<[HasNontrappingFPToInt]>; +defm I64_TRUNC_S_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), + [(set I64:$dst, (fp_to_sint F64:$src))], + "i64.trunc_sat_f64_s\t$dst, $src", + "i64.trunc_sat_f64_s", 0xfc06>, + Requires<[HasNontrappingFPToInt]>; +defm I64_TRUNC_U_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), + [(set I64:$dst, (fp_to_uint F64:$src))], + "i64.trunc_sat_f64_u\t$dst, $src", + "i64.trunc_sat_f64_u", 0xfc07>, + Requires<[HasNontrappingFPToInt]>; + +// Lower llvm.wasm.trunc.saturate.* to saturating instructions +def : Pat<(int_wasm_trunc_saturate_signed F32:$src), + (I32_TRUNC_S_SAT_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src), + (I32_TRUNC_U_SAT_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_saturate_signed F64:$src), + (I32_TRUNC_S_SAT_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src), + (I32_TRUNC_U_SAT_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_saturate_signed F32:$src), + (I64_TRUNC_S_SAT_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src), + (I64_TRUNC_U_SAT_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_saturate_signed F64:$src), + (I64_TRUNC_S_SAT_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src), + (I64_TRUNC_U_SAT_F64 F64:$src)>; + +// Conversion from floating point to integer pseudo-instructions which don't +// trap on overflow or invalid. +let usesCustomInserter = 1, isCodeGenOnly = 1 in { +defm FP_TO_SINT_I32_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins), + [(set I32:$dst, (fp_to_sint F32:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +defm FP_TO_UINT_I32_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins), + [(set I32:$dst, (fp_to_uint F32:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +defm FP_TO_SINT_I64_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins), + [(set I64:$dst, (fp_to_sint F32:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +defm FP_TO_UINT_I64_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins), + [(set I64:$dst, (fp_to_uint F32:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +defm FP_TO_SINT_I32_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins), + [(set I32:$dst, (fp_to_sint F64:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +defm FP_TO_UINT_I32_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins), + [(set I32:$dst, (fp_to_uint F64:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +defm FP_TO_SINT_I64_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), + [(set I64:$dst, (fp_to_sint F64:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +defm FP_TO_UINT_I64_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), + [(set I64:$dst, (fp_to_uint F64:$src))], "", "", 0>, + Requires<[NotHasNontrappingFPToInt]>; +} // usesCustomInserter, isCodeGenOnly = 1 + +// Conversion from floating point to integer traps on overflow and invalid. +let hasSideEffects = 1 in { +defm I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins), + [], "i32.trunc_f32_s\t$dst, $src", "i32.trunc_f32_s", + 0xa8>; +defm I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins), + [], "i32.trunc_f32_u\t$dst, $src", "i32.trunc_f32_u", + 0xa9>; +defm I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins), + [], "i64.trunc_f32_s\t$dst, $src", "i64.trunc_f32_s", + 0xae>; +defm I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins), + [], "i64.trunc_f32_u\t$dst, $src", "i64.trunc_f32_u", + 0xaf>; +defm I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins), + [], "i32.trunc_f64_s\t$dst, $src", "i32.trunc_f64_s", + 0xaa>; +defm I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins), + [], "i32.trunc_f64_u\t$dst, $src", "i32.trunc_f64_u", + 0xab>; +defm I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), + [], "i64.trunc_f64_s\t$dst, $src", "i64.trunc_f64_s", + 0xb0>; +defm I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), + [], "i64.trunc_f64_u\t$dst, $src", "i64.trunc_f64_u", + 0xb1>; +} // hasSideEffects = 1 + +def : Pat<(int_wasm_trunc_signed F32:$src), + (I32_TRUNC_S_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_unsigned F32:$src), + (I32_TRUNC_U_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_signed F64:$src), + (I32_TRUNC_S_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_unsigned F64:$src), + (I32_TRUNC_U_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_signed F32:$src), + (I64_TRUNC_S_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_unsigned F32:$src), + (I64_TRUNC_U_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_signed F64:$src), + (I64_TRUNC_S_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_unsigned F64:$src), + (I64_TRUNC_U_F64 F64:$src)>; + +defm F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins), + [(set F32:$dst, (sint_to_fp I32:$src))], + "f32.convert_i32_s\t$dst, $src", "f32.convert_i32_s", + 0xb2>; +defm F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins), + [(set F32:$dst, (uint_to_fp I32:$src))], + "f32.convert_i32_u\t$dst, $src", "f32.convert_i32_u", + 0xb3>; +defm F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins), + [(set F64:$dst, (sint_to_fp I32:$src))], + "f64.convert_i32_s\t$dst, $src", "f64.convert_i32_s", + 0xb7>; +defm F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins), + [(set F64:$dst, (uint_to_fp I32:$src))], + "f64.convert_i32_u\t$dst, $src", "f64.convert_i32_u", + 0xb8>; +defm F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins), + [(set F32:$dst, (sint_to_fp I64:$src))], + "f32.convert_i64_s\t$dst, $src", "f32.convert_i64_s", + 0xb4>; +defm F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins), + [(set F32:$dst, (uint_to_fp I64:$src))], + "f32.convert_i64_u\t$dst, $src", "f32.convert_i64_u", + 0xb5>; +defm F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins), + [(set F64:$dst, (sint_to_fp I64:$src))], + "f64.convert_i64_s\t$dst, $src", "f64.convert_i64_s", + 0xb9>; +defm F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins), + [(set F64:$dst, (uint_to_fp I64:$src))], + "f64.convert_i64_u\t$dst, $src", "f64.convert_i64_u", + 0xba>; + +defm F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src), (outs), (ins), + [(set F64:$dst, (fpextend F32:$src))], + "f64.promote_f32\t$dst, $src", "f64.promote_f32", + 0xbb>; +defm F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src), (outs), (ins), + [(set F32:$dst, (fpround F64:$src))], + "f32.demote_f64\t$dst, $src", "f32.demote_f64", + 0xb6>; + +defm I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins), + [(set I32:$dst, (bitconvert F32:$src))], + "i32.reinterpret_f32\t$dst, $src", + "i32.reinterpret_f32", 0xbc>; +defm F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins), + [(set F32:$dst, (bitconvert I32:$src))], + "f32.reinterpret_i32\t$dst, $src", + "f32.reinterpret_i32", 0xbe>; +defm I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), + [(set I64:$dst, (bitconvert F64:$src))], + "i64.reinterpret_f64\t$dst, $src", + "i64.reinterpret_f64", 0xbd>; +defm F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins), + [(set F64:$dst, (bitconvert I64:$src))], + "f64.reinterpret_i64\t$dst, $src", + "f64.reinterpret_i64", 0xbf>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td new file mode 100644 index 000000000000..5c9b34f44734 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -0,0 +1,130 @@ +// WebAssemblyInstrFloat.td-WebAssembly Float codegen support ---*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly Floating-point operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst, + bits<32> f64Inst> { + defm _F32 : I<(outs F32:$dst), (ins F32:$src), (outs), (ins), + [(set F32:$dst, (node F32:$src))], + !strconcat("f32.", !strconcat(name, "\t$dst, $src")), + !strconcat("f32.", name), f32Inst>; + defm _F64 : I<(outs F64:$dst), (ins F64:$src), (outs), (ins), + [(set F64:$dst, (node F64:$src))], + !strconcat("f64.", !strconcat(name, "\t$dst, $src")), + !strconcat("f64.", name), f64Inst>; +} +multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst, + bits<32> f64Inst> { + defm _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins), + [(set F32:$dst, (node F32:$lhs, F32:$rhs))], + !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("f32.", name), f32Inst>; + defm _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins), + [(set F64:$dst, (node F64:$lhs, F64:$rhs))], + !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("f64.", name), f64Inst>; +} +multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> { + defm _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins), + [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))], + !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("f32.", name), f32Inst>; + defm _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins), + [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))], + !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("f64.", name), f64Inst>; +} + +let isCommutable = 1 in +defm ADD : BinaryFP<fadd, "add ", 0x92, 0xa0>; +defm SUB : BinaryFP<fsub, "sub ", 0x93, 0xa1>; +let isCommutable = 1 in +defm MUL : BinaryFP<fmul, "mul ", 0x94, 0xa2>; +defm DIV : BinaryFP<fdiv, "div ", 0x95, 0xa3>; +defm SQRT : UnaryFP<fsqrt, "sqrt", 0x91, 0x9f>; + +defm ABS : UnaryFP<fabs, "abs ", 0x8b, 0x99>; +defm NEG : UnaryFP<fneg, "neg ", 0x8c, 0x9a>; +defm COPYSIGN : BinaryFP<fcopysign, "copysign", 0x98, 0xa6>; + +let isCommutable = 1 in { +defm MIN : BinaryFP<fminimum, "min ", 0x96, 0xa4>; +defm MAX : BinaryFP<fmaximum, "max ", 0x97, 0xa5>; +} // isCommutable = 1 + +defm CEIL : UnaryFP<fceil, "ceil", 0x8d, 0x9b>; +defm FLOOR : UnaryFP<ffloor, "floor", 0x8e, 0x9c>; +defm TRUNC : UnaryFP<ftrunc, "trunc", 0x8f, 0x9d>; +defm NEAREST : UnaryFP<fnearbyint, "nearest", 0x90, 0x9e>; + +// DAGCombine oddly folds casts into the rhs of copysign. Unfold them. +def : Pat<(fcopysign F64:$lhs, F32:$rhs), + (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>; +def : Pat<(fcopysign F32:$lhs, F64:$rhs), + (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>; + +// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. +def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>; +def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>; + +let isCommutable = 1 in { +defm EQ : ComparisonFP<SETOEQ, "eq ", 0x5b, 0x61>; +defm NE : ComparisonFP<SETUNE, "ne ", 0x5c, 0x62>; +} // isCommutable = 1 +defm LT : ComparisonFP<SETOLT, "lt ", 0x5d, 0x63>; +defm LE : ComparisonFP<SETOLE, "le ", 0x5f, 0x65>; +defm GT : ComparisonFP<SETOGT, "gt ", 0x5e, 0x64>; +defm GE : ComparisonFP<SETOGE, "ge ", 0x60, 0x66>; + +// Don't care floating-point comparisons, supported via other comparisons. +def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setlt f32:$lhs, f32:$rhs), (LT_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setle f32:$lhs, f32:$rhs), (LE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setgt f32:$lhs, f32:$rhs), (GT_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setge f32:$lhs, f32:$rhs), (GE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(seteq f64:$lhs, f64:$rhs), (EQ_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setne f64:$lhs, f64:$rhs), (NE_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setlt f64:$lhs, f64:$rhs), (LT_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>; + +defm SELECT_F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs, I32:$cond), + (outs), (ins), + [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))], + "f32.select\t$dst, $lhs, $rhs, $cond", "f32.select", 0x1b>; +defm SELECT_F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs, I32:$cond), + (outs), (ins), + [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))], + "f64.select\t$dst, $lhs, $rhs, $cond", "f64.select", 0x1b>; + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select (i32 (setne I32:$cond, 0)), F32:$lhs, F32:$rhs), + (SELECT_F32 F32:$lhs, F32:$rhs, I32:$cond)>; +def : Pat<(select (i32 (setne I32:$cond, 0)), F64:$lhs, F64:$rhs), + (SELECT_F64 F64:$lhs, F64:$rhs, I32:$cond)>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs), + (SELECT_F32 F32:$rhs, F32:$lhs, I32:$cond)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs), + (SELECT_F64 F64:$rhs, F64:$lhs, I32:$cond)>; + +// The legalizer inserts an unnecessary `and 1` to make input conform +// to getBooleanContents, which we can lower away. +def : Pat<(select (i32 (and I32:$cond, 1)), F32:$lhs, F32:$rhs), + (SELECT_F32 F32:$lhs, F32:$rhs, I32:$cond)>; +def : Pat<(select (i32 (and I32:$cond, 1)), F64:$lhs, F64:$rhs), + (SELECT_F64 F64:$lhs, F64:$rhs, I32:$cond)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td new file mode 100644 index 000000000000..aff4d20d8d82 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -0,0 +1,66 @@ +//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly instruction format definitions. +/// +//===----------------------------------------------------------------------===// + +// WebAssembly Instruction Format. +// We instantiate 2 of these for every actual instruction (register based +// and stack based), see below. +class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel, + Instruction { + bits<32> Inst = inst; // Instruction encoding. + string StackBased = stack; + string BaseName = NAME; + let Namespace = "WebAssembly"; + let Pattern = []; + let AsmString = asmstr; + // When there are multiple instructions that map to the same encoding (in + // e.g. the disassembler use case) prefer the one where IsCanonical == 1. + bit IsCanonical = 0; +} + +// Normal instructions. Default instantiation of a WebAssemblyInst. +class NI<dag oops, dag iops, list<dag> pattern, string stack, + string asmstr = "", bits<32> inst = -1> + : WebAssemblyInst<inst, asmstr, stack> { + dag OutOperandList = oops; + dag InOperandList = iops; + let Pattern = pattern; + let Defs = [ARGUMENTS]; +} + +// Generates both register and stack based versions of one actual instruction. +// We have 2 sets of operands (oops & iops) for the register and stack +// based version of this instruction, as well as the corresponding asmstr. +// The register versions have virtual-register operands which correspond to wasm +// locals or stack locations. Each use and def of the register corresponds to an +// implicit local.get / local.set or access of stack operands in wasm. These +// instructions are used for ISel and all MI passes. The stack versions of the +// instructions do not have register operands (they implicitly operate on the +// stack), and local.gets and local.sets are explicit. The register instructions +// are converted to their corresponding stack instructions before lowering to +// MC. +// Every instruction should want to be based on this multi-class to guarantee +// there is always an equivalent pair of instructions. +multiclass I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "", + bits<32> inst = -1> { + let isCodeGenOnly = 1 in + def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst>; + let BaseName = NAME in + def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst>; +} + +// For instructions that have no register ops, so both sets are the same. +multiclass NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "", + bits<32> inst = -1> { + defm "": I<oops, iops, oops, iops, pattern, asmstr, asmstr, inst>; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp new file mode 100644 index 000000000000..8e8126c90e72 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -0,0 +1,232 @@ +//===-- WebAssemblyInstrInfo.cpp - WebAssembly Instruction Information ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the WebAssembly implementation of the +/// TargetInstrInfo class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyInstrInfo.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-instr-info" + +#define GET_INSTRINFO_CTOR_DTOR +#include "WebAssemblyGenInstrInfo.inc" + +// defines WebAssembly::getNamedOperandIdx +#define GET_INSTRINFO_NAMED_OPS +#include "WebAssemblyGenInstrInfo.inc" + +WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) + : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, + WebAssembly::ADJCALLSTACKUP, + WebAssembly::CATCHRET), + RI(STI.getTargetTriple()) {} + +bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI, AAResults *AA) const { + switch (MI.getOpcode()) { + case WebAssembly::CONST_I32: + case WebAssembly::CONST_I64: + case WebAssembly::CONST_F32: + case WebAssembly::CONST_F64: + // isReallyTriviallyReMaterializableGeneric misses these because of the + // ARGUMENTS implicit def, so we manualy override it here. + return true; + default: + return false; + } +} + +void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + // This method is called by post-RA expansion, which expects only pregs to + // exist. However we need to handle both here. + auto &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + Register::isVirtualRegister(DestReg) + ? MRI.getRegClass(DestReg) + : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg); + + unsigned CopyOpcode; + if (RC == &WebAssembly::I32RegClass) + CopyOpcode = WebAssembly::COPY_I32; + else if (RC == &WebAssembly::I64RegClass) + CopyOpcode = WebAssembly::COPY_I64; + else if (RC == &WebAssembly::F32RegClass) + CopyOpcode = WebAssembly::COPY_F32; + else if (RC == &WebAssembly::F64RegClass) + CopyOpcode = WebAssembly::COPY_F64; + else if (RC == &WebAssembly::V128RegClass) + CopyOpcode = WebAssembly::COPY_V128; + else if (RC == &WebAssembly::EXNREFRegClass) + CopyOpcode = WebAssembly::COPY_EXNREF; + else + llvm_unreachable("Unexpected register class"); + + BuildMI(MBB, I, DL, get(CopyOpcode), DestReg) + .addReg(SrcReg, KillSrc ? RegState::Kill : 0); +} + +MachineInstr *WebAssemblyInstrInfo::commuteInstructionImpl( + MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { + // If the operands are stackified, we can't reorder them. + WebAssemblyFunctionInfo &MFI = + *MI.getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>(); + if (MFI.isVRegStackified(MI.getOperand(OpIdx1).getReg()) || + MFI.isVRegStackified(MI.getOperand(OpIdx2).getReg())) + return nullptr; + + // Otherwise use the default implementation. + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); +} + +// Branch analysis. +bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool /*AllowModify*/) const { + const auto &MFI = *MBB.getParent()->getInfo<WebAssemblyFunctionInfo>(); + // WebAssembly has control flow that doesn't have explicit branches or direct + // fallthrough (e.g. try/catch), which can't be modeled by analyzeBranch. It + // is created after CFGStackify. + if (MFI.isCFGStackified()) + return true; + + bool HaveCond = false; + for (MachineInstr &MI : MBB.terminators()) { + switch (MI.getOpcode()) { + default: + // Unhandled instruction; bail out. + return true; + case WebAssembly::BR_IF: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(true)); + Cond.push_back(MI.getOperand(1)); + TBB = MI.getOperand(0).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR_UNLESS: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(false)); + Cond.push_back(MI.getOperand(1)); + TBB = MI.getOperand(0).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR: + if (!HaveCond) + TBB = MI.getOperand(0).getMBB(); + else + FBB = MI.getOperand(0).getMBB(); + break; + case WebAssembly::BR_ON_EXN: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(true)); + Cond.push_back(MI.getOperand(2)); + TBB = MI.getOperand(0).getMBB(); + HaveCond = true; + break; + } + if (MI.isBarrier()) + break; + } + + return false; +} + +unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + assert(!BytesRemoved && "code size not handled"); + + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + unsigned Count = 0; + + while (I != MBB.instr_begin()) { + --I; + if (I->isDebugInstr()) + continue; + if (!I->isTerminator()) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.instr_end(); + ++Count; + } + + return Count; +} + +unsigned WebAssemblyInstrInfo::insertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { + assert(!BytesAdded && "code size not handled"); + + if (Cond.empty()) { + if (!TBB) + return 0; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(TBB); + return 1; + } + + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + + MachineFunction &MF = *MBB.getParent(); + auto &MRI = MF.getRegInfo(); + bool IsBrOnExn = Cond[1].isReg() && MRI.getRegClass(Cond[1].getReg()) == + &WebAssembly::EXNREFRegClass; + + if (Cond[0].getImm()) { + if (IsBrOnExn) { + const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception"); + BuildMI(&MBB, DL, get(WebAssembly::BR_ON_EXN)) + .addMBB(TBB) + .addExternalSymbol(CPPExnSymbol) + .add(Cond[1]); + } else + BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]); + } else { + assert(!IsBrOnExn && "br_on_exn does not have a reversed condition"); + BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)).addMBB(TBB).add(Cond[1]); + } + if (!FBB) + return 1; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(FBB); + return 2; +} + +bool WebAssemblyInstrInfo::reverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Expected a flag and a condition expression"); + + // br_on_exn's condition cannot be reversed + MachineFunction &MF = *Cond[1].getParent()->getParent()->getParent(); + auto &MRI = MF.getRegInfo(); + if (Cond[1].isReg() && + MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXNREFRegClass) + return true; + + Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm()); + return false; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h new file mode 100644 index 000000000000..fe6211663c31 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -0,0 +1,71 @@ +//=- WebAssemblyInstrInfo.h - WebAssembly Instruction Information -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the WebAssembly implementation of the +/// TargetInstrInfo class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H + +#include "WebAssemblyRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "WebAssemblyGenInstrInfo.inc" + +#define GET_INSTRINFO_OPERAND_ENUM +#include "WebAssemblyGenInstrInfo.inc" + +namespace llvm { + +namespace WebAssembly { + +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + +} + +class WebAssemblySubtarget; + +class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { + const WebAssemblyRegisterInfo RI; + +public: + explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI); + + const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } + + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const override; + + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const override; + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + bool + reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td new file mode 100644 index 000000000000..044901481381 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -0,0 +1,350 @@ +// WebAssemblyInstrInfo.td-Describe the WebAssembly Instructions-*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly Instruction definitions. +/// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// WebAssembly Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// + +def IsPIC : Predicate<"TM.isPositionIndependent()">; +def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; + +def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">; + +def HasAddr64 : Predicate<"Subtarget->hasAddr64()">; + +def HasSIMD128 : + Predicate<"Subtarget->hasSIMD128()">, + AssemblerPredicate<"FeatureSIMD128", "simd128">; + +def HasUnimplementedSIMD128 : + Predicate<"Subtarget->hasUnimplementedSIMD128()">, + AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">; + +def HasAtomics : + Predicate<"Subtarget->hasAtomics()">, + AssemblerPredicate<"FeatureAtomics", "atomics">; + +def HasMultivalue : + Predicate<"Subtarget->hasMultivalue()">, + AssemblerPredicate<"FeatureMultivalue", "multivalue">; + +def HasNontrappingFPToInt : + Predicate<"Subtarget->hasNontrappingFPToInt()">, + AssemblerPredicate<"FeatureNontrappingFPToInt", "nontrapping-fptoint">; + +def NotHasNontrappingFPToInt : + Predicate<"!Subtarget->hasNontrappingFPToInt()">, + AssemblerPredicate<"!FeatureNontrappingFPToInt", "nontrapping-fptoint">; + +def HasSignExt : + Predicate<"Subtarget->hasSignExt()">, + AssemblerPredicate<"FeatureSignExt", "sign-ext">; + +def HasTailCall : + Predicate<"Subtarget->hasTailCall()">, + AssemblerPredicate<"FeatureTailCall", "tail-call">; + +def HasExceptionHandling : + Predicate<"Subtarget->hasExceptionHandling()">, + AssemblerPredicate<"FeatureExceptionHandling", "exception-handling">; + +def HasBulkMemory : + Predicate<"Subtarget->hasBulkMemory()">, + AssemblerPredicate<"FeatureBulkMemory", "bulk-memory">; + +//===----------------------------------------------------------------------===// +// WebAssembly-specific DAG Node Types. +//===----------------------------------------------------------------------===// + +def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>, + SDTCisVT<1, iPTR>]>; +def SDT_WebAssemblyCallSeqEnd : + SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; +def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>; +def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; +def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; +def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; +def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; +def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; + +//===----------------------------------------------------------------------===// +// WebAssembly-specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def WebAssemblycallseq_start : + SDNode<"ISD::CALLSEQ_START", SDT_WebAssemblyCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def WebAssemblycallseq_end : + SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0", + SDT_WebAssemblyCall0, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1", + SDT_WebAssemblyCall1, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblyretcall : SDNode<"WebAssemblyISD::RET_CALL", + SDT_WebAssemblyCall0, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE", + SDT_WebAssemblyBrTable, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", + SDT_WebAssemblyArgument>; +def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", + SDT_WebAssemblyReturn, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", + SDT_WebAssemblyWrapper>; +def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC", + SDT_WebAssemblyWrapperPIC>; +def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow, + [SDNPHasChain, SDNPVariadic]>; + +//===----------------------------------------------------------------------===// +// WebAssembly-specific Operands. +//===----------------------------------------------------------------------===// + +// Default Operand has AsmOperandClass "Imm" which is for integers (and +// symbols), so specialize one for floats: +def FPImmAsmOperand : AsmOperandClass { + let Name = "FPImm"; + let PredicateMethod = "isFPImm"; +} + +class FPOperand<ValueType ty> : Operand<ty> { + AsmOperandClass ParserMatchClass = FPImmAsmOperand; +} + +let OperandNamespace = "WebAssembly" in { + +let OperandType = "OPERAND_BASIC_BLOCK" in +def bb_op : Operand<OtherVT>; + +let OperandType = "OPERAND_LOCAL" in +def local_op : Operand<i32>; + +let OperandType = "OPERAND_GLOBAL" in +def global_op : Operand<i32>; + +let OperandType = "OPERAND_I32IMM" in +def i32imm_op : Operand<i32>; + +let OperandType = "OPERAND_I64IMM" in +def i64imm_op : Operand<i64>; + +let OperandType = "OPERAND_F32IMM" in +def f32imm_op : FPOperand<f32>; + +let OperandType = "OPERAND_F64IMM" in +def f64imm_op : FPOperand<f64>; + +let OperandType = "OPERAND_VEC_I8IMM" in +def vec_i8imm_op : Operand<i32>; + +let OperandType = "OPERAND_VEC_I16IMM" in +def vec_i16imm_op : Operand<i32>; + +let OperandType = "OPERAND_VEC_I32IMM" in +def vec_i32imm_op : Operand<i32>; + +let OperandType = "OPERAND_VEC_I64IMM" in +def vec_i64imm_op : Operand<i64>; + +let OperandType = "OPERAND_FUNCTION32" in +def function32_op : Operand<i32>; + +let OperandType = "OPERAND_OFFSET32" in +def offset32_op : Operand<i32>; + +let OperandType = "OPERAND_P2ALIGN" in { +def P2Align : Operand<i32> { + let PrintMethod = "printWebAssemblyP2AlignOperand"; +} + +let OperandType = "OPERAND_EVENT" in +def event_op : Operand<i32>; + +} // OperandType = "OPERAND_P2ALIGN" + +let OperandType = "OPERAND_SIGNATURE" in +def Signature : Operand<i32> { + let PrintMethod = "printWebAssemblySignatureOperand"; +} + +let OperandType = "OPERAND_TYPEINDEX" in +def TypeIndex : Operand<i32>; + +} // OperandNamespace = "WebAssembly" + +//===----------------------------------------------------------------------===// +// WebAssembly Register to Stack instruction mapping +//===----------------------------------------------------------------------===// + +class StackRel; +def getStackOpcode : InstrMapping { + let FilterClass = "StackRel"; + let RowFields = ["BaseName"]; + let ColFields = ["StackBased"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +//===----------------------------------------------------------------------===// +// WebAssembly Instruction Format Definitions. +//===----------------------------------------------------------------------===// + +include "WebAssemblyInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Additional instructions. +//===----------------------------------------------------------------------===// + +multiclass ARGUMENT<WebAssemblyRegClass reg, ValueType vt> { + let hasSideEffects = 1, isCodeGenOnly = 1, Defs = []<Register>, + Uses = [ARGUMENTS] in + defm ARGUMENT_#vt : + I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno), + [(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>; +} +defm "": ARGUMENT<I32, i32>; +defm "": ARGUMENT<I64, i64>; +defm "": ARGUMENT<F32, f32>; +defm "": ARGUMENT<F64, f64>; +defm "": ARGUMENT<EXNREF, exnref>; + +// local.get and local.set are not generated by instruction selection; they +// are implied by virtual register uses and defs. +multiclass LOCAL<WebAssemblyRegClass vt> { + let hasSideEffects = 0 in { + // COPY is not an actual instruction in wasm, but since we allow local.get and + // local.set to be implicit during most of codegen, we can have a COPY which + // is actually a no-op because all the work is done in the implied local.get + // and local.set. COPYs are eliminated (and replaced with + // local.get/local.set) in the ExplicitLocals pass. + let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in + defm COPY_#vt : I<(outs vt:$res), (ins vt:$src), (outs), (ins), [], + "local.copy\t$res, $src", "local.copy">; + + // TEE is similar to COPY, but writes two copies of its result. Typically + // this would be used to stackify one result and write the other result to a + // local. + let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in + defm TEE_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), (outs), (ins), [], + "local.tee\t$res, $also, $src", "local.tee">; + + // This is the actual local.get instruction in wasm. These are made explicit + // by the ExplicitLocals pass. It has mayLoad because it reads from a wasm + // local, which is a side effect not otherwise modeled in LLVM. + let mayLoad = 1, isAsCheapAsAMove = 1 in + defm LOCAL_GET_#vt : I<(outs vt:$res), (ins local_op:$local), + (outs), (ins local_op:$local), [], + "local.get\t$res, $local", "local.get\t$local", 0x20>; + + // This is the actual local.set instruction in wasm. These are made explicit + // by the ExplicitLocals pass. It has mayStore because it writes to a wasm + // local, which is a side effect not otherwise modeled in LLVM. + let mayStore = 1, isAsCheapAsAMove = 1 in + defm LOCAL_SET_#vt : I<(outs), (ins local_op:$local, vt:$src), + (outs), (ins local_op:$local), [], + "local.set\t$local, $src", "local.set\t$local", 0x21>; + + // This is the actual local.tee instruction in wasm. TEEs are turned into + // LOCAL_TEEs by the ExplicitLocals pass. It has mayStore for the same reason + // as LOCAL_SET. + let mayStore = 1, isAsCheapAsAMove = 1 in + defm LOCAL_TEE_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src), + (outs), (ins local_op:$local), [], + "local.tee\t$res, $local, $src", "local.tee\t$local", + 0x22>; + + // Unused values must be dropped in some contexts. + defm DROP_#vt : I<(outs), (ins vt:$src), (outs), (ins), [], + "drop\t$src", "drop", 0x1a>; + + let mayLoad = 1 in + defm GLOBAL_GET_#vt : I<(outs vt:$res), (ins global_op:$local), + (outs), (ins global_op:$local), [], + "global.get\t$res, $local", "global.get\t$local", + 0x23>; + + let mayStore = 1 in + defm GLOBAL_SET_#vt : I<(outs), (ins global_op:$local, vt:$src), + (outs), (ins global_op:$local), [], + "global.set\t$local, $src", "global.set\t$local", + 0x24>; + +} // hasSideEffects = 0 +} +defm "" : LOCAL<I32>; +defm "" : LOCAL<I64>; +defm "" : LOCAL<F32>; +defm "" : LOCAL<F64>; +defm "" : LOCAL<V128>, Requires<[HasSIMD128]>; +defm "" : LOCAL<EXNREF>, Requires<[HasExceptionHandling]>; + +let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { +defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm), + (outs), (ins i32imm_op:$imm), + [(set I32:$res, imm:$imm)], + "i32.const\t$res, $imm", "i32.const\t$imm", 0x41>; +defm CONST_I64 : I<(outs I64:$res), (ins i64imm_op:$imm), + (outs), (ins i64imm_op:$imm), + [(set I64:$res, imm:$imm)], + "i64.const\t$res, $imm", "i64.const\t$imm", 0x42>; +defm CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm), + (outs), (ins f32imm_op:$imm), + [(set F32:$res, fpimm:$imm)], + "f32.const\t$res, $imm", "f32.const\t$imm", 0x43>; +defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm), + (outs), (ins f64imm_op:$imm), + [(set F64:$res, fpimm:$imm)], + "f64.const\t$res, $imm", "f64.const\t$imm", 0x44>; +} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 + +def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), + (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC]>; + +def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)), + (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>; + +def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)), + (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>; + +def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)), + (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC]>; + +def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)), + (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC]>; + +def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>; +def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>; + +//===----------------------------------------------------------------------===// +// Additional sets of instructions. +//===----------------------------------------------------------------------===// + +include "WebAssemblyInstrMemory.td" +include "WebAssemblyInstrCall.td" +include "WebAssemblyInstrControl.td" +include "WebAssemblyInstrInteger.td" +include "WebAssemblyInstrConv.td" +include "WebAssemblyInstrFloat.td" +include "WebAssemblyInstrAtomics.td" +include "WebAssemblyInstrSIMD.td" +include "WebAssemblyInstrRef.td" +include "WebAssemblyInstrBulkMemory.td" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td new file mode 100644 index 000000000000..18250cf8ef85 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -0,0 +1,123 @@ +// WebAssemblyInstrInteger.td-WebAssembly Integer codegen -------*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly Integer operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst, + bits<32> i64Inst> { + defm _I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins), + [(set I32:$dst, (node I32:$src))], + !strconcat("i32.", !strconcat(name, "\t$dst, $src")), + !strconcat("i32.", name), i32Inst>; + defm _I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins), + [(set I64:$dst, (node I64:$src))], + !strconcat("i64.", !strconcat(name, "\t$dst, $src")), + !strconcat("i64.", name), i64Inst>; +} +multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst, + bits<32> i64Inst> { + defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins), + [(set I32:$dst, (node I32:$lhs, I32:$rhs))], + !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("i32.", name), i32Inst>; + defm _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins), + [(set I64:$dst, (node I64:$lhs, I64:$rhs))], + !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("i64.", name), i64Inst>; +} +multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> { + defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins), + [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))], + !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("i32.", name), i32Inst>; + defm _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins), + [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))], + !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")), + !strconcat("i64.", name), i64Inst>; +} + +// The spaces after the names are for aesthetic purposes only, to make +// operands line up vertically after tab expansion. +let isCommutable = 1 in +defm ADD : BinaryInt<add, "add ", 0x6a, 0x7c>; +defm SUB : BinaryInt<sub, "sub ", 0x6b, 0x7d>; +let isCommutable = 1 in +defm MUL : BinaryInt<mul, "mul ", 0x6c, 0x7e>; +// Divide and remainder trap on a zero denominator. +let hasSideEffects = 1 in { +defm DIV_S : BinaryInt<sdiv, "div_s", 0x6d, 0x7f>; +defm DIV_U : BinaryInt<udiv, "div_u", 0x6e, 0x80>; +defm REM_S : BinaryInt<srem, "rem_s", 0x6f, 0x81>; +defm REM_U : BinaryInt<urem, "rem_u", 0x70, 0x82>; +} // hasSideEffects = 1 +let isCommutable = 1 in { +defm AND : BinaryInt<and, "and ", 0x71, 0x83>; +defm OR : BinaryInt<or, "or ", 0x72, 0x84>; +defm XOR : BinaryInt<xor, "xor ", 0x73, 0x85>; +} // isCommutable = 1 +defm SHL : BinaryInt<shl, "shl ", 0x74, 0x86>; +defm SHR_S : BinaryInt<sra, "shr_s", 0x75, 0x87>; +defm SHR_U : BinaryInt<srl, "shr_u", 0x76, 0x88>; +defm ROTL : BinaryInt<rotl, "rotl", 0x77, 0x89>; +defm ROTR : BinaryInt<rotr, "rotr", 0x78, 0x8a>; + +let isCommutable = 1 in { +defm EQ : ComparisonInt<SETEQ, "eq ", 0x46, 0x51>; +defm NE : ComparisonInt<SETNE, "ne ", 0x47, 0x52>; +} // isCommutable = 1 +defm LT_S : ComparisonInt<SETLT, "lt_s", 0x48, 0x53>; +defm LT_U : ComparisonInt<SETULT, "lt_u", 0x49, 0x54>; +defm GT_S : ComparisonInt<SETGT, "gt_s", 0x4a, 0x55>; +defm GT_U : ComparisonInt<SETUGT, "gt_u", 0x4b, 0x56>; +defm LE_S : ComparisonInt<SETLE, "le_s", 0x4c, 0x57>; +defm LE_U : ComparisonInt<SETULE, "le_u", 0x4d, 0x58>; +defm GE_S : ComparisonInt<SETGE, "ge_s", 0x4e, 0x59>; +defm GE_U : ComparisonInt<SETUGE, "ge_u", 0x4f, 0x5a>; + +defm CLZ : UnaryInt<ctlz, "clz ", 0x67, 0x79>; +defm CTZ : UnaryInt<cttz, "ctz ", 0x68, 0x7a>; +defm POPCNT : UnaryInt<ctpop, "popcnt", 0x69, 0x7b>; + +defm EQZ_I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins), + [(set I32:$dst, (setcc I32:$src, 0, SETEQ))], + "i32.eqz \t$dst, $src", "i32.eqz", 0x45>; +defm EQZ_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins), + [(set I32:$dst, (setcc I64:$src, 0, SETEQ))], + "i64.eqz \t$dst, $src", "i64.eqz", 0x50>; + +// Optimize away an explicit mask on a rotate count. +def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>; +def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>; +def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>; +def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>; + +defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond), + (outs), (ins), + [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))], + "i32.select\t$dst, $lhs, $rhs, $cond", "i32.select", 0x1b>; +defm SELECT_I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs, I32:$cond), + (outs), (ins), + [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))], + "i64.select\t$dst, $lhs, $rhs, $cond", "i64.select", 0x1b>; + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select (i32 (setne I32:$cond, 0)), I32:$lhs, I32:$rhs), + (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>; +def : Pat<(select (i32 (setne I32:$cond, 0)), I64:$lhs, I64:$rhs), + (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs), + (SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs), + (SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td new file mode 100644 index 000000000000..eba9b80d3286 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -0,0 +1,320 @@ +// WebAssemblyInstrMemory.td-WebAssembly Memory codegen support -*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly Memory operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +// TODO: +// - HasAddr64 +// - WebAssemblyTargetLowering having to do with atomics +// - Each has optional alignment. + +// WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16 +// local types. These memory-only types instead zero- or sign-extend into local +// types when loading, and truncate when storing. + +// WebAssembly constant offsets are performed as unsigned with infinite +// precision, so we need to check for NoUnsignedWrap so that we don't fold an +// offset for an add that needs wrapping. +def regPlusImm : PatFrag<(ops node:$addr, node:$off), + (add node:$addr, node:$off), + [{ return N->getFlags().hasNoUnsignedWrap(); }]>; + +// Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + + KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); + KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); + return (~Known0.Zero & ~Known1.Zero) == 0; +}]>; + +// We don't need a regPlusES because external symbols never have constant +// offsets folded into them, so we can just use add. + +// Defines atomic and non-atomic loads, regular and extending. +multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> { + let mayLoad = 1, UseNamedOperandTable = 1 in + defm "": I<(outs rc:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), + [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), + !strconcat(Name, "\t${off}${p2align}"), Opcode>; +} + +// Basic load. +// FIXME: When we can break syntax compatibility, reorder the fields in the +// asmstrings to match the binary encoding. +defm LOAD_I32 : WebAssemblyLoad<I32, "i32.load", 0x28>; +defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>; +defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>; +defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>; + +// Select loads with no constant offset. +class LoadPatNoOffset<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind I32:$addr)), (inst 0, 0, I32:$addr)>; + +def : LoadPatNoOffset<i32, load, LOAD_I32>; +def : LoadPatNoOffset<i64, load, LOAD_I64>; +def : LoadPatNoOffset<f32, load, LOAD_F32>; +def : LoadPatNoOffset<f64, load, LOAD_F64>; + + +// Select loads with a constant offset. + +// Pattern with address + immediate offset +class LoadPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : + Pat<(ty (kind (operand I32:$addr, imm:$off))), (inst 0, imm:$off, I32:$addr)>; + +def : LoadPatImmOff<i32, load, regPlusImm, LOAD_I32>; +def : LoadPatImmOff<i64, load, regPlusImm, LOAD_I64>; +def : LoadPatImmOff<f32, load, regPlusImm, LOAD_F32>; +def : LoadPatImmOff<f64, load, regPlusImm, LOAD_F64>; +def : LoadPatImmOff<i32, load, or_is_add, LOAD_I32>; +def : LoadPatImmOff<i64, load, or_is_add, LOAD_I64>; +def : LoadPatImmOff<f32, load, or_is_add, LOAD_F32>; +def : LoadPatImmOff<f64, load, or_is_add, LOAD_F64>; + +// Select loads with just a constant offset. +class LoadPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>; + +def : LoadPatOffsetOnly<i32, load, LOAD_I32>; +def : LoadPatOffsetOnly<i64, load, LOAD_I64>; +def : LoadPatOffsetOnly<f32, load, LOAD_F32>; +def : LoadPatOffsetOnly<f64, load, LOAD_F64>; + +class LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))), + (inst 0, tglobaladdr:$off, (CONST_I32 0))>, Requires<[IsNotPIC]>; + +def : LoadPatGlobalAddrOffOnly<i32, load, LOAD_I32>; +def : LoadPatGlobalAddrOffOnly<i64, load, LOAD_I64>; +def : LoadPatGlobalAddrOffOnly<f32, load, LOAD_F32>; +def : LoadPatGlobalAddrOffOnly<f64, load, LOAD_F64>; + +// Extending load. +defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>; +defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>; +defm LOAD16_S_I32 : WebAssemblyLoad<I32, "i32.load16_s", 0x2e>; +defm LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.load16_u", 0x2f>; +defm LOAD8_S_I64 : WebAssemblyLoad<I64, "i64.load8_s", 0x30>; +defm LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.load8_u", 0x31>; +defm LOAD16_S_I64 : WebAssemblyLoad<I64, "i64.load16_s", 0x32>; +defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33>; +defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>; +defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>; + +// Select extending loads with no constant offset. +def : LoadPatNoOffset<i32, sextloadi8, LOAD8_S_I32>; +def : LoadPatNoOffset<i32, zextloadi8, LOAD8_U_I32>; +def : LoadPatNoOffset<i32, sextloadi16, LOAD16_S_I32>; +def : LoadPatNoOffset<i32, zextloadi16, LOAD16_U_I32>; +def : LoadPatNoOffset<i64, sextloadi8, LOAD8_S_I64>; +def : LoadPatNoOffset<i64, zextloadi8, LOAD8_U_I64>; +def : LoadPatNoOffset<i64, sextloadi16, LOAD16_S_I64>; +def : LoadPatNoOffset<i64, zextloadi16, LOAD16_U_I64>; +def : LoadPatNoOffset<i64, sextloadi32, LOAD32_S_I64>; +def : LoadPatNoOffset<i64, zextloadi32, LOAD32_U_I64>; + +// Select extending loads with a constant offset. +def : LoadPatImmOff<i32, sextloadi8, regPlusImm, LOAD8_S_I32>; +def : LoadPatImmOff<i32, zextloadi8, regPlusImm, LOAD8_U_I32>; +def : LoadPatImmOff<i32, sextloadi16, regPlusImm, LOAD16_S_I32>; +def : LoadPatImmOff<i32, zextloadi16, regPlusImm, LOAD16_U_I32>; +def : LoadPatImmOff<i64, sextloadi8, regPlusImm, LOAD8_S_I64>; +def : LoadPatImmOff<i64, zextloadi8, regPlusImm, LOAD8_U_I64>; +def : LoadPatImmOff<i64, sextloadi16, regPlusImm, LOAD16_S_I64>; +def : LoadPatImmOff<i64, zextloadi16, regPlusImm, LOAD16_U_I64>; +def : LoadPatImmOff<i64, sextloadi32, regPlusImm, LOAD32_S_I64>; +def : LoadPatImmOff<i64, zextloadi32, regPlusImm, LOAD32_U_I64>; + +def : LoadPatImmOff<i32, sextloadi8, or_is_add, LOAD8_S_I32>; +def : LoadPatImmOff<i32, zextloadi8, or_is_add, LOAD8_U_I32>; +def : LoadPatImmOff<i32, sextloadi16, or_is_add, LOAD16_S_I32>; +def : LoadPatImmOff<i32, zextloadi16, or_is_add, LOAD16_U_I32>; +def : LoadPatImmOff<i64, sextloadi8, or_is_add, LOAD8_S_I64>; +def : LoadPatImmOff<i64, zextloadi8, or_is_add, LOAD8_U_I64>; +def : LoadPatImmOff<i64, sextloadi16, or_is_add, LOAD16_S_I64>; +def : LoadPatImmOff<i64, zextloadi16, or_is_add, LOAD16_U_I64>; +def : LoadPatImmOff<i64, sextloadi32, or_is_add, LOAD32_S_I64>; +def : LoadPatImmOff<i64, zextloadi32, or_is_add, LOAD32_U_I64>; + +// Select extending loads with just a constant offset. +def : LoadPatOffsetOnly<i32, sextloadi8, LOAD8_S_I32>; +def : LoadPatOffsetOnly<i32, zextloadi8, LOAD8_U_I32>; +def : LoadPatOffsetOnly<i32, sextloadi16, LOAD16_S_I32>; +def : LoadPatOffsetOnly<i32, zextloadi16, LOAD16_U_I32>; + +def : LoadPatOffsetOnly<i64, sextloadi8, LOAD8_S_I64>; +def : LoadPatOffsetOnly<i64, zextloadi8, LOAD8_U_I64>; +def : LoadPatOffsetOnly<i64, sextloadi16, LOAD16_S_I64>; +def : LoadPatOffsetOnly<i64, zextloadi16, LOAD16_U_I64>; +def : LoadPatOffsetOnly<i64, sextloadi32, LOAD32_S_I64>; +def : LoadPatOffsetOnly<i64, zextloadi32, LOAD32_U_I64>; + +def : LoadPatGlobalAddrOffOnly<i32, sextloadi8, LOAD8_S_I32>; +def : LoadPatGlobalAddrOffOnly<i32, zextloadi8, LOAD8_U_I32>; +def : LoadPatGlobalAddrOffOnly<i32, sextloadi16, LOAD16_S_I32>; +def : LoadPatGlobalAddrOffOnly<i32, zextloadi16, LOAD16_U_I32>; +def : LoadPatGlobalAddrOffOnly<i64, sextloadi8, LOAD8_S_I64>; +def : LoadPatGlobalAddrOffOnly<i64, zextloadi8, LOAD8_U_I64>; +def : LoadPatGlobalAddrOffOnly<i64, sextloadi16, LOAD16_S_I64>; +def : LoadPatGlobalAddrOffOnly<i64, zextloadi16, LOAD16_U_I64>; +def : LoadPatGlobalAddrOffOnly<i64, sextloadi32, LOAD32_S_I64>; +def : LoadPatGlobalAddrOffOnly<i64, zextloadi32, LOAD32_U_I64>; + +// Resolve "don't care" extending loads to zero-extending loads. This is +// somewhat arbitrary, but zero-extending is conceptually simpler. + +// Select "don't care" extending loads with no constant offset. +def : LoadPatNoOffset<i32, extloadi8, LOAD8_U_I32>; +def : LoadPatNoOffset<i32, extloadi16, LOAD16_U_I32>; +def : LoadPatNoOffset<i64, extloadi8, LOAD8_U_I64>; +def : LoadPatNoOffset<i64, extloadi16, LOAD16_U_I64>; +def : LoadPatNoOffset<i64, extloadi32, LOAD32_U_I64>; + +// Select "don't care" extending loads with a constant offset. +def : LoadPatImmOff<i32, extloadi8, regPlusImm, LOAD8_U_I32>; +def : LoadPatImmOff<i32, extloadi16, regPlusImm, LOAD16_U_I32>; +def : LoadPatImmOff<i64, extloadi8, regPlusImm, LOAD8_U_I64>; +def : LoadPatImmOff<i64, extloadi16, regPlusImm, LOAD16_U_I64>; +def : LoadPatImmOff<i64, extloadi32, regPlusImm, LOAD32_U_I64>; +def : LoadPatImmOff<i32, extloadi8, or_is_add, LOAD8_U_I32>; +def : LoadPatImmOff<i32, extloadi16, or_is_add, LOAD16_U_I32>; +def : LoadPatImmOff<i64, extloadi8, or_is_add, LOAD8_U_I64>; +def : LoadPatImmOff<i64, extloadi16, or_is_add, LOAD16_U_I64>; +def : LoadPatImmOff<i64, extloadi32, or_is_add, LOAD32_U_I64>; + +// Select "don't care" extending loads with just a constant offset. +def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>; +def : LoadPatOffsetOnly<i32, extloadi16, LOAD16_U_I32>; +def : LoadPatOffsetOnly<i64, extloadi8, LOAD8_U_I64>; +def : LoadPatOffsetOnly<i64, extloadi16, LOAD16_U_I64>; +def : LoadPatOffsetOnly<i64, extloadi32, LOAD32_U_I64>; +def : LoadPatGlobalAddrOffOnly<i32, extloadi8, LOAD8_U_I32>; +def : LoadPatGlobalAddrOffOnly<i32, extloadi16, LOAD16_U_I32>; +def : LoadPatGlobalAddrOffOnly<i64, extloadi8, LOAD8_U_I64>; +def : LoadPatGlobalAddrOffOnly<i64, extloadi16, LOAD16_U_I64>; +def : LoadPatGlobalAddrOffOnly<i64, extloadi32, LOAD32_U_I64>; + +// Defines atomic and non-atomic stores, regular and truncating +multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> { + let mayStore = 1, UseNamedOperandTable = 1 in + defm "" : I<(outs), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val), + (outs), + (ins P2Align:$p2align, offset32_op:$off), [], + !strconcat(Name, "\t${off}(${addr})${p2align}, $val"), + !strconcat(Name, "\t${off}${p2align}"), Opcode>; +} +// Basic store. +// Note: WebAssembly inverts SelectionDAG's usual operand order. +defm STORE_I32 : WebAssemblyStore<I32, "i32.store", 0x36>; +defm STORE_I64 : WebAssemblyStore<I64, "i64.store", 0x37>; +defm STORE_F32 : WebAssemblyStore<F32, "f32.store", 0x38>; +defm STORE_F64 : WebAssemblyStore<F64, "f64.store", 0x39>; + +// Select stores with no constant offset. +class StorePatNoOffset<ValueType ty, PatFrag node, NI inst> : + Pat<(node ty:$val, I32:$addr), (inst 0, 0, I32:$addr, ty:$val)>; + +def : StorePatNoOffset<i32, store, STORE_I32>; +def : StorePatNoOffset<i64, store, STORE_I64>; +def : StorePatNoOffset<f32, store, STORE_F32>; +def : StorePatNoOffset<f64, store, STORE_F64>; + +// Select stores with a constant offset. +class StorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> : + Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), + (inst 0, imm:$off, I32:$addr, ty:$val)>; + +def : StorePatImmOff<i32, store, regPlusImm, STORE_I32>; +def : StorePatImmOff<i64, store, regPlusImm, STORE_I64>; +def : StorePatImmOff<f32, store, regPlusImm, STORE_F32>; +def : StorePatImmOff<f64, store, regPlusImm, STORE_F64>; +def : StorePatImmOff<i32, store, or_is_add, STORE_I32>; +def : StorePatImmOff<i64, store, or_is_add, STORE_I64>; +def : StorePatImmOff<f32, store, or_is_add, STORE_F32>; +def : StorePatImmOff<f64, store, or_is_add, STORE_F64>; + +// Select stores with just a constant offset. +class StorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; +def : StorePatOffsetOnly<i32, store, STORE_I32>; +def : StorePatOffsetOnly<i64, store, STORE_I64>; +def : StorePatOffsetOnly<f32, store, STORE_F32>; +def : StorePatOffsetOnly<f64, store, STORE_F64>; + +class StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> : + Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), + (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, Requires<[IsNotPIC]>; +def : StorePatGlobalAddrOffOnly<i32, store, STORE_I32>; +def : StorePatGlobalAddrOffOnly<i64, store, STORE_I64>; +def : StorePatGlobalAddrOffOnly<f32, store, STORE_F32>; +def : StorePatGlobalAddrOffOnly<f64, store, STORE_F64>; + +// Truncating store. +defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>; +defm STORE16_I32 : WebAssemblyStore<I32, "i32.store16", 0x3b>; +defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>; +defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>; +defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>; + +// Select truncating stores with no constant offset. +def : StorePatNoOffset<i32, truncstorei8, STORE8_I32>; +def : StorePatNoOffset<i32, truncstorei16, STORE16_I32>; +def : StorePatNoOffset<i64, truncstorei8, STORE8_I64>; +def : StorePatNoOffset<i64, truncstorei16, STORE16_I64>; +def : StorePatNoOffset<i64, truncstorei32, STORE32_I64>; + +// Select truncating stores with a constant offset. +def : StorePatImmOff<i32, truncstorei8, regPlusImm, STORE8_I32>; +def : StorePatImmOff<i32, truncstorei16, regPlusImm, STORE16_I32>; +def : StorePatImmOff<i64, truncstorei8, regPlusImm, STORE8_I64>; +def : StorePatImmOff<i64, truncstorei16, regPlusImm, STORE16_I64>; +def : StorePatImmOff<i64, truncstorei32, regPlusImm, STORE32_I64>; +def : StorePatImmOff<i32, truncstorei8, or_is_add, STORE8_I32>; +def : StorePatImmOff<i32, truncstorei16, or_is_add, STORE16_I32>; +def : StorePatImmOff<i64, truncstorei8, or_is_add, STORE8_I64>; +def : StorePatImmOff<i64, truncstorei16, or_is_add, STORE16_I64>; +def : StorePatImmOff<i64, truncstorei32, or_is_add, STORE32_I64>; + +// Select truncating stores with just a constant offset. +def : StorePatOffsetOnly<i32, truncstorei8, STORE8_I32>; +def : StorePatOffsetOnly<i32, truncstorei16, STORE16_I32>; +def : StorePatOffsetOnly<i64, truncstorei8, STORE8_I64>; +def : StorePatOffsetOnly<i64, truncstorei16, STORE16_I64>; +def : StorePatOffsetOnly<i64, truncstorei32, STORE32_I64>; +def : StorePatGlobalAddrOffOnly<i32, truncstorei8, STORE8_I32>; +def : StorePatGlobalAddrOffOnly<i32, truncstorei16, STORE16_I32>; +def : StorePatGlobalAddrOffOnly<i64, truncstorei8, STORE8_I64>; +def : StorePatGlobalAddrOffOnly<i64, truncstorei16, STORE16_I64>; +def : StorePatGlobalAddrOffOnly<i64, truncstorei32, STORE32_I64>; + +// Current memory size. +defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags), + (outs), (ins i32imm:$flags), + [(set I32:$dst, + (int_wasm_memory_size (i32 imm:$flags)))], + "memory.size\t$dst, $flags", "memory.size\t$flags", + 0x3f>, + Requires<[HasAddr32]>; + +// Grow memory. +defm MEMORY_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta), + (outs), (ins i32imm:$flags), + [(set I32:$dst, + (int_wasm_memory_grow (i32 imm:$flags), + I32:$delta))], + "memory.grow\t$dst, $flags, $delta", + "memory.grow\t$flags", 0x40>, + Requires<[HasAddr32]>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td new file mode 100644 index 000000000000..afe89de60b36 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -0,0 +1,25 @@ +// WebAssemblyInstrRef.td - WebAssembly reference type codegen --*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly refence type operand codegen constructs. +/// +//===----------------------------------------------------------------------===// + +defm SELECT_EXNREF : I<(outs EXNREF:$dst), + (ins EXNREF:$lhs, EXNREF:$rhs, I32:$cond), + (outs), (ins), + [(set EXNREF:$dst, + (select I32:$cond, EXNREF:$lhs, EXNREF:$rhs))], + "exnref.select\t$dst, $lhs, $rhs, $cond", + "exnref.select", 0x1b>; + +def : Pat<(select (i32 (setne I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs), + (SELECT_EXNREF EXNREF:$lhs, EXNREF:$rhs, I32:$cond)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs), + (SELECT_EXNREF EXNREF:$rhs, EXNREF:$lhs, I32:$cond)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td new file mode 100644 index 000000000000..fc5d73dac52e --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -0,0 +1,887 @@ +// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly SIMD operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +// Instructions requiring HasSIMD128 and the simd128 prefix byte +multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r = "", + string asmstr_s = "", bits<32> simdop = -1> { + defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, + !or(0xfd00, !and(0xff, simdop))>, + Requires<[HasSIMD128]>; +} + +defm "" : ARGUMENT<V128, v16i8>; +defm "" : ARGUMENT<V128, v8i16>; +defm "" : ARGUMENT<V128, v4i32>; +defm "" : ARGUMENT<V128, v2i64>; +defm "" : ARGUMENT<V128, v4f32>; +defm "" : ARGUMENT<V128, v2f64>; + +// Constrained immediate argument types +foreach SIZE = [8, 16] in +def ImmI#SIZE : ImmLeaf<i32, + "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));" +>; +foreach SIZE = [2, 4, 8, 16, 32] in +def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; + +//===----------------------------------------------------------------------===// +// Load and store +//===----------------------------------------------------------------------===// + +// Load: v128.load +let mayLoad = 1, UseNamedOperandTable = 1 in +defm LOAD_V128 : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.load\t$dst, ${off}(${addr})$p2align", + "v128.load\t$off$p2align", 0>; + +// Def load and store patterns from WebAssemblyInstrMemory.td for vector types +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { +def : LoadPatNoOffset<vec_t, load, LOAD_V128>; +def : LoadPatImmOff<vec_t, load, regPlusImm, LOAD_V128>; +def : LoadPatImmOff<vec_t, load, or_is_add, LOAD_V128>; +def : LoadPatOffsetOnly<vec_t, load, LOAD_V128>; +def : LoadPatGlobalAddrOffOnly<vec_t, load, LOAD_V128>; +} + +// vNxM.load_splat +multiclass SIMDLoadSplat<string vec, bits<32> simdop> { + let mayLoad = 1, UseNamedOperandTable = 1, + Predicates = [HasUnimplementedSIMD128] in + defm LOAD_SPLAT_#vec : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + vec#".load_splat\t$dst, ${off}(${addr})$p2align", + vec#".load_splat\t$off$p2align", simdop>; +} + +defm "" : SIMDLoadSplat<"v8x16", 194>; +defm "" : SIMDLoadSplat<"v16x8", 195>; +defm "" : SIMDLoadSplat<"v32x4", 196>; +defm "" : SIMDLoadSplat<"v64x2", 197>; + +def wasm_load_splat_t : SDTypeProfile<1, 1, []>; +def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t>; + +foreach args = [["v16i8", "i32", "extloadi8"], ["v8i16", "i32", "extloadi16"], + ["v4i32", "i32", "load"], ["v2i64", "i64", "load"], + ["v4f32", "f32", "load"], ["v2f64", "f64", "load"]] in +def load_splat_#args[0] : + PatFrag<(ops node:$addr), (wasm_load_splat + (!cast<ValueType>(args[1]) (!cast<PatFrag>(args[2]) node:$addr)))>; + +let Predicates = [HasUnimplementedSIMD128] in +foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], + ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { +def : LoadPatNoOffset<!cast<ValueType>(args[0]), + !cast<PatFrag>("load_splat_"#args[0]), + !cast<NI>("LOAD_SPLAT_"#args[1])>; +def : LoadPatImmOff<!cast<ValueType>(args[0]), + !cast<PatFrag>("load_splat_"#args[0]), + regPlusImm, + !cast<NI>("LOAD_SPLAT_"#args[1])>; +def : LoadPatImmOff<!cast<ValueType>(args[0]), + !cast<PatFrag>("load_splat_"#args[0]), + or_is_add, + !cast<NI>("LOAD_SPLAT_"#args[1])>; +def : LoadPatOffsetOnly<!cast<ValueType>(args[0]), + !cast<PatFrag>("load_splat_"#args[0]), + !cast<NI>("LOAD_SPLAT_"#args[1])>; +def : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]), + !cast<PatFrag>("load_splat_"#args[0]), + !cast<NI>("LOAD_SPLAT_"#args[1])>; +} + +// Load and extend +multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> { + let mayLoad = 1, UseNamedOperandTable = 1, + Predicates = [HasUnimplementedSIMD128] in { + defm LOAD_EXTEND_S_#vec_t : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + name#"_s\t$dst, ${off}(${addr})$p2align", + name#"_s\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec_t : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + name#"_u\t$dst, ${off}(${addr})$p2align", + name#"_u\t$off$p2align", !add(simdop, 1)>; + } +} + +defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 210>; +defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 212>; +defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 214>; + +let Predicates = [HasUnimplementedSIMD128] in +foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in +foreach exts = [["sextloadv", "_S"], + ["zextloadv", "_U"], + ["extloadv", "_U"]] in { +def : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]), + !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm, + !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add, + !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), + !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]), + !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +} + + +// Store: v128.store +let mayStore = 1, UseNamedOperandTable = 1 in +defm STORE_V128 : + SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.store\t${off}(${addr})$p2align, $vec", + "v128.store\t$off$p2align", 1>; + +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { +// Def load and store patterns from WebAssemblyInstrMemory.td for vector types +def : StorePatNoOffset<vec_t, store, STORE_V128>; +def : StorePatImmOff<vec_t, store, regPlusImm, STORE_V128>; +def : StorePatImmOff<vec_t, store, or_is_add, STORE_V128>; +def : StorePatOffsetOnly<vec_t, store, STORE_V128>; +def : StorePatGlobalAddrOffOnly<vec_t, store, STORE_V128>; +} + +//===----------------------------------------------------------------------===// +// Constructing SIMD values +//===----------------------------------------------------------------------===// + +// Constant: v128.const +multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> { + let isMoveImm = 1, isReMaterializable = 1, + Predicates = [HasUnimplementedSIMD128] in + defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, + [(set V128:$dst, (vec_t pat))], + "v128.const\t$dst, "#args, + "v128.const\t"#args, 2>; +} + +defm "" : ConstVec<v16i8, + (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1, + vec_i8imm_op:$i2, vec_i8imm_op:$i3, + vec_i8imm_op:$i4, vec_i8imm_op:$i5, + vec_i8imm_op:$i6, vec_i8imm_op:$i7, + vec_i8imm_op:$i8, vec_i8imm_op:$i9, + vec_i8imm_op:$iA, vec_i8imm_op:$iB, + vec_i8imm_op:$iC, vec_i8imm_op:$iD, + vec_i8imm_op:$iE, vec_i8imm_op:$iF), + (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3, + ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7, + ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB, + ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF), + !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ", + "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>; +defm "" : ConstVec<v8i16, + (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1, + vec_i16imm_op:$i2, vec_i16imm_op:$i3, + vec_i16imm_op:$i4, vec_i16imm_op:$i5, + vec_i16imm_op:$i6, vec_i16imm_op:$i7), + (build_vector + ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3, + ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7), + "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">; +let IsCanonical = 1 in +defm "" : ConstVec<v4i32, + (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1, + vec_i32imm_op:$i2, vec_i32imm_op:$i3), + (build_vector (i32 imm:$i0), (i32 imm:$i1), + (i32 imm:$i2), (i32 imm:$i3)), + "$i0, $i1, $i2, $i3">; +defm "" : ConstVec<v2i64, + (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1), + (build_vector (i64 imm:$i0), (i64 imm:$i1)), + "$i0, $i1">; +defm "" : ConstVec<v4f32, + (ins f32imm_op:$i0, f32imm_op:$i1, + f32imm_op:$i2, f32imm_op:$i3), + (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1), + (f32 fpimm:$i2), (f32 fpimm:$i3)), + "$i0, $i1, $i2, $i3">; +defm "" : ConstVec<v2f64, + (ins f64imm_op:$i0, f64imm_op:$i1), + (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)), + "$i0, $i1">; + +// Shuffle lanes: shuffle +defm SHUFFLE : + SIMD_I<(outs V128:$dst), + (ins V128:$x, V128:$y, + vec_i8imm_op:$m0, vec_i8imm_op:$m1, + vec_i8imm_op:$m2, vec_i8imm_op:$m3, + vec_i8imm_op:$m4, vec_i8imm_op:$m5, + vec_i8imm_op:$m6, vec_i8imm_op:$m7, + vec_i8imm_op:$m8, vec_i8imm_op:$m9, + vec_i8imm_op:$mA, vec_i8imm_op:$mB, + vec_i8imm_op:$mC, vec_i8imm_op:$mD, + vec_i8imm_op:$mE, vec_i8imm_op:$mF), + (outs), + (ins + vec_i8imm_op:$m0, vec_i8imm_op:$m1, + vec_i8imm_op:$m2, vec_i8imm_op:$m3, + vec_i8imm_op:$m4, vec_i8imm_op:$m5, + vec_i8imm_op:$m6, vec_i8imm_op:$m7, + vec_i8imm_op:$m8, vec_i8imm_op:$m9, + vec_i8imm_op:$mA, vec_i8imm_op:$mB, + vec_i8imm_op:$mC, vec_i8imm_op:$mD, + vec_i8imm_op:$mE, vec_i8imm_op:$mF), + [], + "v8x16.shuffle\t$dst, $x, $y, "# + "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# + "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", + "v8x16.shuffle\t"# + "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# + "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", + 3>; + +// Shuffles after custom lowering +def wasm_shuffle_t : SDTypeProfile<1, 18, []>; +def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { +def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), + (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), + (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), + (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), + (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), + (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), + (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), + (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), + (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))), + (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y), + (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), + (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), + (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), + (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), + (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), + (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), + (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), + (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; +} + +// Swizzle lanes: v8x16.swizzle +def wasm_swizzle_t : SDTypeProfile<1, 2, []>; +def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; +let Predicates = [HasUnimplementedSIMD128] in +defm SWIZZLE : + SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), + [(set (v16i8 V128:$dst), + (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], + "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>; + +def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), + (SWIZZLE V128:$src, V128:$mask)>; + +// Create vector with identical lanes: splat +def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; +def splat4 : PatFrag<(ops node:$x), (build_vector + node:$x, node:$x, node:$x, node:$x)>; +def splat8 : PatFrag<(ops node:$x), (build_vector + node:$x, node:$x, node:$x, node:$x, + node:$x, node:$x, node:$x, node:$x)>; +def splat16 : PatFrag<(ops node:$x), (build_vector + node:$x, node:$x, node:$x, node:$x, + node:$x, node:$x, node:$x, node:$x, + node:$x, node:$x, node:$x, node:$x, + node:$x, node:$x, node:$x, node:$x)>; + +multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t, + PatFrag splat_pat, bits<32> simdop> { + // Prefer splats over v128.const for const splats (65 is lowest that works) + let AddedComplexity = 65 in + defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins), + [(set (vec_t V128:$dst), (splat_pat reg_t:$x))], + vec#".splat\t$dst, $x", vec#".splat", simdop>; +} + +defm "" : Splat<v16i8, "i8x16", I32, splat16, 4>; +defm "" : Splat<v8i16, "i16x8", I32, splat8, 8>; +defm "" : Splat<v4i32, "i32x4", I32, splat4, 12>; +defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>; +defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>; +defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>; + +// scalar_to_vector leaves high lanes undefined, so can be a splat +class ScalarSplatPat<ValueType vec_t, ValueType lane_t, + WebAssemblyRegClass reg_t> : + Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))), + (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>; + +def : ScalarSplatPat<v16i8, i32, I32>; +def : ScalarSplatPat<v8i16, i32, I32>; +def : ScalarSplatPat<v4i32, i32, I32>; +def : ScalarSplatPat<v2i64, i64, I64>; +def : ScalarSplatPat<v4f32, f32, F32>; +def : ScalarSplatPat<v2f64, f64, F64>; + +//===----------------------------------------------------------------------===// +// Accessing lanes +//===----------------------------------------------------------------------===// + +// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u +multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t, + WebAssemblyRegClass reg_t, bits<32> simdop, + string suffix = "", SDNode extract = vector_extract> { + defm EXTRACT_LANE_#vec_t#suffix : + SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx), + (outs), (ins vec_i8imm_op:$idx), + [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))], + vec#".extract_lane"#suffix#"\t$dst, $vec, $idx", + vec#".extract_lane"#suffix#"\t$idx", simdop>; +} + +multiclass ExtractPat<ValueType lane_t, int mask> { + def _s : PatFrag<(ops node:$vec, node:$idx), + (i32 (sext_inreg + (i32 (vector_extract + node:$vec, + node:$idx + )), + lane_t + ))>; + def _u : PatFrag<(ops node:$vec, node:$idx), + (i32 (and + (i32 (vector_extract + node:$vec, + node:$idx + )), + (i32 mask) + ))>; +} + +defm extract_i8x16 : ExtractPat<i8, 0xff>; +defm extract_i16x8 : ExtractPat<i16, 0xffff>; + +multiclass ExtractLaneExtended<string sign, bits<32> baseInst> { + defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign, + !cast<PatFrag>("extract_i8x16"#sign)>; + defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign, + !cast<PatFrag>("extract_i16x8"#sign)>; +} + +defm "" : ExtractLaneExtended<"_s", 5>; +let Predicates = [HasUnimplementedSIMD128] in +defm "" : ExtractLaneExtended<"_u", 6>; +defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>; +defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>; +defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>; +defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>; + +// It would be more conventional to use unsigned extracts, but v8 +// doesn't implement them yet +def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))), + (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>; +def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))), + (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>; + +// Lower undef lane indices to zero +def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)), + (EXTRACT_LANE_v16i8_u V128:$vec, 0)>; +def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)), + (EXTRACT_LANE_v8i16_u V128:$vec, 0)>; +def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)), + (EXTRACT_LANE_v16i8_u V128:$vec, 0)>; +def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)), + (EXTRACT_LANE_v8i16_u V128:$vec, 0)>; +def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8), + (EXTRACT_LANE_v16i8_s V128:$vec, 0)>; +def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16), + (EXTRACT_LANE_v8i16_s V128:$vec, 0)>; +def : Pat<(vector_extract (v4i32 V128:$vec), undef), + (EXTRACT_LANE_v4i32 V128:$vec, 0)>; +def : Pat<(vector_extract (v2i64 V128:$vec), undef), + (EXTRACT_LANE_v2i64 V128:$vec, 0)>; +def : Pat<(vector_extract (v4f32 V128:$vec), undef), + (EXTRACT_LANE_v4f32 V128:$vec, 0)>; +def : Pat<(vector_extract (v2f64 V128:$vec), undef), + (EXTRACT_LANE_v2f64 V128:$vec, 0)>; + +// Replace lane value: replace_lane +multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t, + WebAssemblyRegClass reg_t, ValueType lane_t, + bits<32> simdop> { + defm REPLACE_LANE_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x), + (outs), (ins vec_i8imm_op:$idx), + [(set V128:$dst, (vector_insert + (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))], + vec#".replace_lane\t$dst, $vec, $idx, $x", + vec#".replace_lane\t$idx", simdop>; +} + +defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 7>; +defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 11>; +defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 14>; +defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 17>; +defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 20>; +defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 23>; + +// Lower undef lane indices to zero +def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), + (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>; +def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef), + (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>; +def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef), + (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>; +def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef), + (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>; +def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef), + (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>; +def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef), + (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>; + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec, + string name, CondCode cond, bits<32> simdop> { + defm _#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), + [(set (out_t V128:$dst), + (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond) + )], + vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>; +} + +multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> { + defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>; + defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond, + !add(baseInst, 10)>; + defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond, + !add(baseInst, 20)>; +} + +multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { + defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>; + defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond, + !add(baseInst, 6)>; +} + +// Equality: eq +let isCommutable = 1 in { +defm EQ : SIMDConditionInt<"eq", SETEQ, 24>; +defm EQ : SIMDConditionFP<"eq", SETOEQ, 64>; +} // isCommutable = 1 + +// Non-equality: ne +let isCommutable = 1 in { +defm NE : SIMDConditionInt<"ne", SETNE, 25>; +defm NE : SIMDConditionFP<"ne", SETUNE, 65>; +} // isCommutable = 1 + +// Less than: lt_s / lt_u / lt +defm LT_S : SIMDConditionInt<"lt_s", SETLT, 26>; +defm LT_U : SIMDConditionInt<"lt_u", SETULT, 27>; +defm LT : SIMDConditionFP<"lt", SETOLT, 66>; + +// Greater than: gt_s / gt_u / gt +defm GT_S : SIMDConditionInt<"gt_s", SETGT, 28>; +defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 29>; +defm GT : SIMDConditionFP<"gt", SETOGT, 67>; + +// Less than or equal: le_s / le_u / le +defm LE_S : SIMDConditionInt<"le_s", SETLE, 30>; +defm LE_U : SIMDConditionInt<"le_u", SETULE, 31>; +defm LE : SIMDConditionFP<"le", SETOLE, 68>; + +// Greater than or equal: ge_s / ge_u / ge +defm GE_S : SIMDConditionInt<"ge_s", SETGE, 32>; +defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>; +defm GE : SIMDConditionFP<"ge", SETOGE, 69>; + +// Lower float comparisons that don't care about NaN to standard WebAssembly +// float comparisons. These instructions are generated with nnan and in the +// target-independent expansion of unordered comparisons and ordered ne. +foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32], + [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in +def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))), + (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>; + +foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64], + [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in +def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), + (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>; + + +//===----------------------------------------------------------------------===// +// Bitwise operations +//===----------------------------------------------------------------------===// + +multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name, + bits<32> simdop> { + defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + (outs), (ins), + [(set (vec_t V128:$dst), + (node (vec_t V128:$lhs), (vec_t V128:$rhs)) + )], + vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, + simdop>; +} + +multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> { + defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>; + defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>; + defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>; + defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>; +} + +multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name, + bits<32> simdop> { + defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), + [(set (vec_t V128:$dst), + (vec_t (node (vec_t V128:$vec))) + )], + vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; +} + +// Bitwise logic: v128.not +foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in +defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 76>; + +// Bitwise logic: v128.and / v128.or / v128.xor +let isCommutable = 1 in { +defm AND : SIMDBitwise<and, "and", 77>; +defm OR : SIMDBitwise<or, "or", 78>; +defm XOR : SIMDBitwise<xor, "xor", 79>; +} // isCommutable = 1 + +// Bitwise logic: v128.andnot +def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; +let Predicates = [HasUnimplementedSIMD128] in +defm ANDNOT : SIMDBitwise<andnot, "andnot", 216>; + +// Bitwise select: v128.bitselect +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + defm BITSELECT_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), + [(set (vec_t V128:$dst), + (vec_t (int_wasm_bitselect + (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) + )) + )], + "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 80>; + +// Bitselect is equivalent to (c & v1) | (~c & v2) +foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in + def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)), + (and (vnot V128:$c), (vec_t V128:$v2)))), + (!cast<Instruction>("BITSELECT_"#vec_t) + V128:$v1, V128:$v2, V128:$c)>; + +//===----------------------------------------------------------------------===// +// Integer unary arithmetic +//===----------------------------------------------------------------------===// + +multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>; + defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 17)>; + defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 34)>; + defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 51)>; +} + +multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name, + bits<32> simdop> { + defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), + [(set I32:$dst, (i32 (op (vec_t V128:$vec))))], + vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>; +} + +multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> { + defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>; + defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 17)>; + defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 34)>; + defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 51)>; +} + +// Integer vector negation +def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; + +// Integer negation: neg +defm NEG : SIMDUnaryInt<ivneg, "neg", 81>; + +// Any lane true: any_true +defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>; + +// All lanes true: all_true +defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>; + +// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 +// can be folded out +foreach reduction = + [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in +foreach ty = [v16i8, v8i16, v4i32, v2i64] in { +def : Pat<(i32 (and + (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), + (i32 1) + )), + (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; +def : Pat<(i32 (setne + (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), + (i32 0) + )), + (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; +def : Pat<(i32 (seteq + (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))), + (i32 1) + )), + (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>; +} + +//===----------------------------------------------------------------------===// +// Bit shifts +//===----------------------------------------------------------------------===// + +multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec, + string name, bits<32> simdop> { + defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), + (outs), (ins), + [(set (vec_t V128:$dst), + (node V128:$vec, (vec_t shift_vec)))], + vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>; +} + +multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>; + defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name, + !add(baseInst, 17)>; + defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name, + !add(baseInst, 34)>; + defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))), + name, !add(baseInst, 51)>; +} + +// Left shift by scalar: shl +defm SHL : SIMDShiftInt<shl, "shl", 84>; + +// Right shift by scalar: shr_s / shr_u +defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>; +defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>; + +// Truncate i64 shift operands to i32s, except if they are already i32s +foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in { +def : Pat<(v2i64 (shifts[0] + (v2i64 V128:$vec), + (v2i64 (splat2 (i64 (sext I32:$x)))) + )), + (v2i64 (shifts[1] (v2i64 V128:$vec), (i32 I32:$x)))>; +def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))), + (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>; +} + +// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping +def wasm_shift_t : SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>] +>; +def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; +def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; +def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; +foreach shifts = [[wasm_shl, SHL_v2i64], + [wasm_shr_s, SHR_S_v2i64], + [wasm_shr_u, SHR_U_v2i64]] in +def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)), + (v2i64 (shifts[1] (v2i64 V128:$vec), I32:$x))>; + +//===----------------------------------------------------------------------===// +// Integer binary arithmetic +//===----------------------------------------------------------------------===// + +multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>; + defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 17)>; +} + +multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDBinaryIntSmall<node, name, baseInst>; + defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 34)>; +} + +multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; + defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 51)>; +} + +// Integer addition: add / add_saturate_s / add_saturate_u +let isCommutable = 1 in { +defm ADD : SIMDBinaryInt<add, "add", 87>; +defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 88>; +defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 89>; +} // isCommutable = 1 + +// Integer subtraction: sub / sub_saturate_s / sub_saturate_u +defm SUB : SIMDBinaryInt<sub, "sub", 90>; +defm SUB_SAT_S : + SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 91>; +defm SUB_SAT_U : + SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 92>; + +// Integer multiplication: mul +defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 93>; + +//===----------------------------------------------------------------------===// +// Floating-point unary arithmetic +//===----------------------------------------------------------------------===// + +multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>; + defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 11)>; +} + +// Absolute value: abs +defm ABS : SIMDUnaryFP<fabs, "abs", 149>; + +// Negation: neg +defm NEG : SIMDUnaryFP<fneg, "neg", 150>; + +// Square root: sqrt +let Predicates = [HasUnimplementedSIMD128] in +defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>; + +//===----------------------------------------------------------------------===// +// Floating-point binary arithmetic +//===----------------------------------------------------------------------===// + +multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> { + defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>; + defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 11)>; +} + +// Addition: add +let isCommutable = 1 in +defm ADD : SIMDBinaryFP<fadd, "add", 154>; + +// Subtraction: sub +defm SUB : SIMDBinaryFP<fsub, "sub", 155>; + +// Multiplication: mul +let isCommutable = 1 in +defm MUL : SIMDBinaryFP<fmul, "mul", 156>; + +// Division: div +let Predicates = [HasUnimplementedSIMD128] in +defm DIV : SIMDBinaryFP<fdiv, "div", 157>; + +// NaN-propagating minimum: min +defm MIN : SIMDBinaryFP<fminimum, "min", 158>; + +// NaN-propagating maximum: max +defm MAX : SIMDBinaryFP<fmaximum, "max", 159>; + +//===----------------------------------------------------------------------===// +// Conversions +//===----------------------------------------------------------------------===// + +multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op, + string name, bits<32> simdop> { + defm op#_#vec_t#_#arg_t : + SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), + [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))], + name#"\t$dst, $vec", name, simdop>; +} + +// Integer to floating point: convert +defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 175>; +defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 176>; +defm "" : SIMDConvert<v2f64, v2i64, sint_to_fp, "f64x2.convert_i64x2_s", 177>; +defm "" : SIMDConvert<v2f64, v2i64, uint_to_fp, "f64x2.convert_i64x2_u", 178>; + +// Floating point to integer with saturation: trunc_sat +defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 171>; +defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 172>; +defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_f64x2_s", 173>; +defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_f64x2_u", 174>; + +// Widening operations +multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg, + bits<32> baseInst> { + defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_signed, + vec#".widen_low_"#arg#"_s", baseInst>; + defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_signed, + vec#".widen_high_"#arg#"_s", !add(baseInst, 1)>; + defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_low_unsigned, + vec#".widen_low_"#arg#"_u", !add(baseInst, 2)>; + defm "" : SIMDConvert<vec_t, arg_t, int_wasm_widen_high_unsigned, + vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>; +} + +defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 202>; +defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 206>; + +// Narrowing operations +multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg, + bits<32> baseInst> { + defm NARROW_S_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), + [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed + (arg_t V128:$low), (arg_t V128:$high))))], + vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s", + baseInst>; + defm NARROW_U_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), + [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned + (arg_t V128:$low), (arg_t V128:$high))))], + vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u", + !add(baseInst, 1)>; +} + +defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 198>; +defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 200>; + +// Lower llvm.wasm.trunc.saturate.* to saturating instructions +def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), + (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; +def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), + (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; +def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))), + (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>; +def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))), + (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>; + +// Bitcasts are nops +// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types +foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in +foreach t2 = !foldl( + []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)), + acc, !listconcat(acc, [cur]) + ) +) in +def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; + +//===----------------------------------------------------------------------===// +// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) +//===----------------------------------------------------------------------===// + +multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> { + defm QFMA_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; + defm QFMS_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; +} + +defm "" : SIMDQFM<v4f32, "f32x4", 0x98>; +defm "" : SIMDQFM<v2f64, "f64x2", 0xa3>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp new file mode 100644 index 000000000000..75d04252cbe9 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -0,0 +1,391 @@ +//=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Does various transformations for exception handling. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-late-eh-prepare" + +namespace { +class WebAssemblyLateEHPrepare final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Late Prepare Exception"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + bool addCatches(MachineFunction &MF); + bool replaceFuncletReturns(MachineFunction &MF); + bool removeUnnecessaryUnreachables(MachineFunction &MF); + bool addExceptionExtraction(MachineFunction &MF); + bool restoreStackPointer(MachineFunction &MF); + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyLateEHPrepare::ID = 0; +INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE, + "WebAssembly Late Exception Preparation", false, false) + +FunctionPass *llvm::createWebAssemblyLateEHPrepare() { + return new WebAssemblyLateEHPrepare(); +} + +// Returns the nearest EH pad that dominates this instruction. This does not use +// dominator analysis; it just does BFS on its predecessors until arriving at an +// EH pad. This assumes valid EH scopes so the first EH pad it arrives in all +// possible search paths should be the same. +// Returns nullptr in case it does not find any EH pad in the search, or finds +// multiple different EH pads. +static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) { + MachineFunction *MF = MI->getParent()->getParent(); + SmallVector<MachineBasicBlock *, 2> WL; + SmallPtrSet<MachineBasicBlock *, 2> Visited; + WL.push_back(MI->getParent()); + MachineBasicBlock *EHPad = nullptr; + while (!WL.empty()) { + MachineBasicBlock *MBB = WL.pop_back_val(); + if (Visited.count(MBB)) + continue; + Visited.insert(MBB); + if (MBB->isEHPad()) { + if (EHPad && EHPad != MBB) + return nullptr; + EHPad = MBB; + continue; + } + if (MBB == &MF->front()) + return nullptr; + WL.append(MBB->pred_begin(), MBB->pred_end()); + } + return EHPad; +} + +// Erase the specified BBs if the BB does not have any remaining predecessors, +// and also all its dead children. +template <typename Container> +static void eraseDeadBBsAndChildren(const Container &MBBs) { + SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end()); + while (!WL.empty()) { + MachineBasicBlock *MBB = WL.pop_back_val(); + if (!MBB->pred_empty()) + continue; + SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(), + MBB->succ_end()); + WL.append(MBB->succ_begin(), MBB->succ_end()); + for (auto *Succ : Succs) + MBB->removeSuccessor(Succ); + MBB->eraseFromParent(); + } +} + +bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n" + "********** Function: " + << MF.getName() << '\n'); + + if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() != + ExceptionHandling::Wasm) + return false; + + bool Changed = false; + if (MF.getFunction().hasPersonalityFn()) { + Changed |= addCatches(MF); + Changed |= replaceFuncletReturns(MF); + } + Changed |= removeUnnecessaryUnreachables(MF); + if (MF.getFunction().hasPersonalityFn()) { + Changed |= addExceptionExtraction(MF); + Changed |= restoreStackPointer(MF); + } + return Changed; +} + +// Add catch instruction to beginning of catchpads and cleanuppads. +bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) { + bool Changed = false; + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (auto &MBB : MF) { + if (MBB.isEHPad()) { + Changed = true; + auto InsertPos = MBB.begin(); + if (InsertPos->isEHLabel()) // EH pad starts with an EH label + ++InsertPos; + Register DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); + BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(), + TII.get(WebAssembly::CATCH), DstReg); + } + } + return Changed; +} + +bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { + bool Changed = false; + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + for (auto &MBB : MF) { + auto Pos = MBB.getFirstTerminator(); + if (Pos == MBB.end()) + continue; + MachineInstr *TI = &*Pos; + + switch (TI->getOpcode()) { + case WebAssembly::CATCHRET: { + // Replace a catchret with a branch + MachineBasicBlock *TBB = TI->getOperand(0).getMBB(); + if (!MBB.isLayoutSuccessor(TBB)) + BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::BR)) + .addMBB(TBB); + TI->eraseFromParent(); + Changed = true; + break; + } + case WebAssembly::CLEANUPRET: + case WebAssembly::RETHROW_IN_CATCH: { + // Replace a cleanupret/rethrow_in_catch with a rethrow + auto *EHPad = getMatchingEHPad(TI); + auto CatchPos = EHPad->begin(); + if (CatchPos->isEHLabel()) // EH pad starts with an EH label + ++CatchPos; + MachineInstr *Catch = &*CatchPos; + Register ExnReg = Catch->getOperand(0).getReg(); + BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) + .addReg(ExnReg); + TI->eraseFromParent(); + Changed = true; + break; + } + } + } + return Changed; +} + +bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( + MachineFunction &MF) { + bool Changed = false; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.getOpcode() != WebAssembly::THROW && + MI.getOpcode() != WebAssembly::RETHROW) + continue; + Changed = true; + + // The instruction after the throw should be an unreachable or a branch to + // another BB that should eventually lead to an unreachable. Delete it + // because throw itself is a terminator, and also delete successors if + // any. + MBB.erase(std::next(MI.getIterator()), MBB.end()); + SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(), + MBB.succ_end()); + for (auto *Succ : Succs) + if (!Succ->isEHPad()) + MBB.removeSuccessor(Succ); + eraseDeadBBsAndChildren(Succs); + } + } + + return Changed; +} + +// Wasm uses 'br_on_exn' instruction to check the tag of an exception. It takes +// exnref type object returned by 'catch', and branches to the destination if it +// matches a given tag. We currently use __cpp_exception symbol to represent the +// tag for all C++ exceptions. +// +// block $l (result i32) +// ... +// ;; exnref $e is on the stack at this point +// br_on_exn $l $e ;; branch to $l with $e's arguments +// ... +// end +// ;; Here we expect the extracted values are on top of the wasm value stack +// ... Handle exception using values ... +// +// br_on_exn takes an exnref object and branches if it matches the given tag. +// There can be multiple br_on_exn instructions if we want to match for another +// tag, but for now we only test for __cpp_exception tag, and if it does not +// match, i.e., it is a foreign exception, we rethrow it. +// +// In the destination BB that's the target of br_on_exn, extracted exception +// values (in C++'s case a single i32, which represents an exception pointer) +// are placed on top of the wasm stack. Because we can't model wasm stack in +// LLVM instruction, we use 'extract_exception' pseudo instruction to retrieve +// it. The pseudo instruction will be deleted later. +bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto *EHInfo = MF.getWasmEHFuncInfo(); + SmallVector<MachineInstr *, 16> ExtractInstrs; + SmallVector<MachineInstr *, 8> ToDelete; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) { + if (MI.getOperand(0).isDead()) + ToDelete.push_back(&MI); + else + ExtractInstrs.push_back(&MI); + } + } + } + bool Changed = !ToDelete.empty() || !ExtractInstrs.empty(); + for (auto *MI : ToDelete) + MI->eraseFromParent(); + if (ExtractInstrs.empty()) + return Changed; + + // Find terminate pads. + SmallSet<MachineBasicBlock *, 8> TerminatePads; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isCall()) { + const MachineOperand &CalleeOp = MI.getOperand(0); + if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() == + WebAssembly::ClangCallTerminateFn) + TerminatePads.insert(getMatchingEHPad(&MI)); + } + } + } + + for (auto *Extract : ExtractInstrs) { + MachineBasicBlock *EHPad = getMatchingEHPad(Extract); + assert(EHPad && "No matching EH pad for extract_exception"); + auto CatchPos = EHPad->begin(); + if (CatchPos->isEHLabel()) // EH pad starts with an EH label + ++CatchPos; + MachineInstr *Catch = &*CatchPos; + + if (Catch->getNextNode() != Extract) + EHPad->insert(Catch->getNextNode(), Extract->removeFromParent()); + + // - Before: + // ehpad: + // %exnref:exnref = catch + // %exn:i32 = extract_exception + // ... use exn ... + // + // - After: + // ehpad: + // %exnref:exnref = catch + // br_on_exn %thenbb, $__cpp_exception, %exnref + // br %elsebb + // elsebb: + // rethrow + // thenbb: + // %exn:i32 = extract_exception + // ... use exn ... + Register ExnReg = Catch->getOperand(0).getReg(); + auto *ThenMBB = MF.CreateMachineBasicBlock(); + auto *ElseMBB = MF.CreateMachineBasicBlock(); + MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB); + MF.insert(std::next(MachineFunction::iterator(ElseMBB)), ThenMBB); + ThenMBB->splice(ThenMBB->end(), EHPad, Extract, EHPad->end()); + ThenMBB->transferSuccessors(EHPad); + EHPad->addSuccessor(ThenMBB); + EHPad->addSuccessor(ElseMBB); + + DebugLoc DL = Extract->getDebugLoc(); + const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception"); + BuildMI(EHPad, DL, TII.get(WebAssembly::BR_ON_EXN)) + .addMBB(ThenMBB) + .addExternalSymbol(CPPExnSymbol) + .addReg(ExnReg); + BuildMI(EHPad, DL, TII.get(WebAssembly::BR)).addMBB(ElseMBB); + + // When this is a terminate pad with __clang_call_terminate() call, we don't + // rethrow it anymore and call __clang_call_terminate() with a nullptr + // argument, which will call std::terminate(). + // + // - Before: + // ehpad: + // %exnref:exnref = catch + // %exn:i32 = extract_exception + // call @__clang_call_terminate(%exn) + // unreachable + // + // - After: + // ehpad: + // %exnref:exnref = catch + // br_on_exn %thenbb, $__cpp_exception, %exnref + // br %elsebb + // elsebb: + // call @__clang_call_terminate(0) + // unreachable + // thenbb: + // %exn:i32 = extract_exception + // call @__clang_call_terminate(%exn) + // unreachable + if (TerminatePads.count(EHPad)) { + Function *ClangCallTerminateFn = + MF.getFunction().getParent()->getFunction( + WebAssembly::ClangCallTerminateFn); + assert(ClangCallTerminateFn && + "There is no __clang_call_terminate() function"); + Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0); + BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID)) + .addGlobalAddress(ClangCallTerminateFn) + .addReg(Reg); + BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE)); + + } else { + BuildMI(ElseMBB, DL, TII.get(WebAssembly::RETHROW)).addReg(ExnReg); + if (EHInfo->hasEHPadUnwindDest(EHPad)) + ElseMBB->addSuccessor(EHInfo->getEHPadUnwindDest(EHPad)); + } + } + + return true; +} + +// After the stack is unwound due to a thrown exception, the __stack_pointer +// global can point to an invalid address. This inserts instructions that +// restore __stack_pointer global. +bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { + const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>( + MF.getSubtarget().getFrameLowering()); + if (!FrameLowering->needsPrologForEH(MF)) + return false; + bool Changed = false; + + for (auto &MBB : MF) { + if (!MBB.isEHPad()) + continue; + Changed = true; + + // Insert __stack_pointer restoring instructions at the beginning of each EH + // pad, after the catch instruction. Here it is safe to assume that SP32 + // holds the latest value of __stack_pointer, because the only exception for + // this case is when a function uses the red zone, but that only happens + // with leaf functions, and we don't restore __stack_pointer in leaf + // functions anyway. + auto InsertPos = MBB.begin(); + if (InsertPos->isEHLabel()) // EH pad starts with an EH label + ++InsertPos; + if (InsertPos->getOpcode() == WebAssembly::CATCH) + ++InsertPos; + FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos, + MBB.begin()->getDebugLoc()); + } + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp new file mode 100644 index 000000000000..4314aa611549 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -0,0 +1,210 @@ +//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file lowers br_unless into br_if with an inverted condition. +/// +/// br_unless is not currently in the spec, but it's very convenient for LLVM +/// to use. This pass allows LLVM to use it, for now. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-br_unless" + +namespace { +class WebAssemblyLowerBrUnless final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Lower br_unless"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyLowerBrUnless::ID = 0; +INITIALIZE_PASS(WebAssemblyLowerBrUnless, DEBUG_TYPE, + "Lowers br_unless into inverted br_if", false, false) + +FunctionPass *llvm::createWebAssemblyLowerBrUnless() { + return new WebAssemblyLowerBrUnless(); +} + +bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Lowering br_unless **********\n" + "********** Function: " + << MF.getName() << '\n'); + + auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + for (auto &MBB : MF) { + for (auto MII = MBB.begin(); MII != MBB.end();) { + MachineInstr *MI = &*MII++; + if (MI->getOpcode() != WebAssembly::BR_UNLESS) + continue; + + Register Cond = MI->getOperand(1).getReg(); + bool Inverted = false; + + // Attempt to invert the condition in place. + if (MFI.isVRegStackified(Cond)) { + assert(MRI.hasOneDef(Cond)); + MachineInstr *Def = MRI.getVRegDef(Cond); + switch (Def->getOpcode()) { + using namespace WebAssembly; + case EQ_I32: + Def->setDesc(TII.get(NE_I32)); + Inverted = true; + break; + case NE_I32: + Def->setDesc(TII.get(EQ_I32)); + Inverted = true; + break; + case GT_S_I32: + Def->setDesc(TII.get(LE_S_I32)); + Inverted = true; + break; + case GE_S_I32: + Def->setDesc(TII.get(LT_S_I32)); + Inverted = true; + break; + case LT_S_I32: + Def->setDesc(TII.get(GE_S_I32)); + Inverted = true; + break; + case LE_S_I32: + Def->setDesc(TII.get(GT_S_I32)); + Inverted = true; + break; + case GT_U_I32: + Def->setDesc(TII.get(LE_U_I32)); + Inverted = true; + break; + case GE_U_I32: + Def->setDesc(TII.get(LT_U_I32)); + Inverted = true; + break; + case LT_U_I32: + Def->setDesc(TII.get(GE_U_I32)); + Inverted = true; + break; + case LE_U_I32: + Def->setDesc(TII.get(GT_U_I32)); + Inverted = true; + break; + case EQ_I64: + Def->setDesc(TII.get(NE_I64)); + Inverted = true; + break; + case NE_I64: + Def->setDesc(TII.get(EQ_I64)); + Inverted = true; + break; + case GT_S_I64: + Def->setDesc(TII.get(LE_S_I64)); + Inverted = true; + break; + case GE_S_I64: + Def->setDesc(TII.get(LT_S_I64)); + Inverted = true; + break; + case LT_S_I64: + Def->setDesc(TII.get(GE_S_I64)); + Inverted = true; + break; + case LE_S_I64: + Def->setDesc(TII.get(GT_S_I64)); + Inverted = true; + break; + case GT_U_I64: + Def->setDesc(TII.get(LE_U_I64)); + Inverted = true; + break; + case GE_U_I64: + Def->setDesc(TII.get(LT_U_I64)); + Inverted = true; + break; + case LT_U_I64: + Def->setDesc(TII.get(GE_U_I64)); + Inverted = true; + break; + case LE_U_I64: + Def->setDesc(TII.get(GT_U_I64)); + Inverted = true; + break; + case EQ_F32: + Def->setDesc(TII.get(NE_F32)); + Inverted = true; + break; + case NE_F32: + Def->setDesc(TII.get(EQ_F32)); + Inverted = true; + break; + case EQ_F64: + Def->setDesc(TII.get(NE_F64)); + Inverted = true; + break; + case NE_F64: + Def->setDesc(TII.get(EQ_F64)); + Inverted = true; + break; + case EQZ_I32: { + // Invert an eqz by replacing it with its operand. + Cond = Def->getOperand(1).getReg(); + Def->eraseFromParent(); + Inverted = true; + break; + } + default: + break; + } + } + + // If we weren't able to invert the condition in place. Insert an + // instruction to invert it. + if (!Inverted) { + Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp) + .addReg(Cond); + MFI.stackifyVReg(Tmp); + Cond = Tmp; + Inverted = true; + } + + // The br_unless condition has now been inverted. Insert a br_if and + // delete the br_unless. + assert(Inverted); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) + .add(MI->getOperand(0)) + .addReg(Cond); + MBB.erase(MI); + } + } + + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp new file mode 100644 index 000000000000..1cf397dd060b --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -0,0 +1,1111 @@ +//=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file lowers exception-related instructions and setjmp/longjmp +/// function calls in order to use Emscripten's JavaScript try and catch +/// mechanism. +/// +/// To handle exceptions and setjmp/longjmps, this scheme relies on JavaScript's +/// try and catch syntax and relevant exception-related libraries implemented +/// in JavaScript glue code that will be produced by Emscripten. This is similar +/// to the current Emscripten asm.js exception handling in fastcomp. For +/// fastcomp's EH / SjLj scheme, see these files in fastcomp LLVM branch: +/// (Location: https://github.com/kripken/emscripten-fastcomp) +/// lib/Target/JSBackend/NaCl/LowerEmExceptionsPass.cpp +/// lib/Target/JSBackend/NaCl/LowerEmSetjmp.cpp +/// lib/Target/JSBackend/JSBackend.cpp +/// lib/Target/JSBackend/CallHandlers.h +/// +/// * Exception handling +/// This pass lowers invokes and landingpads into library functions in JS glue +/// code. Invokes are lowered into function wrappers called invoke wrappers that +/// exist in JS side, which wraps the original function call with JS try-catch. +/// If an exception occurred, cxa_throw() function in JS side sets some +/// variables (see below) so we can check whether an exception occurred from +/// wasm code and handle it appropriately. +/// +/// * Setjmp-longjmp handling +/// This pass lowers setjmp to a reasonably-performant approach for emscripten. +/// The idea is that each block with a setjmp is broken up into two parts: the +/// part containing setjmp and the part right after the setjmp. The latter part +/// is either reached from the setjmp, or later from a longjmp. To handle the +/// longjmp, all calls that might longjmp are also called using invoke wrappers +/// and thus JS / try-catch. JS longjmp() function also sets some variables so +/// we can check / whether a longjmp occurred from wasm code. Each block with a +/// function call that might longjmp is also split up after the longjmp call. +/// After the longjmp call, we check whether a longjmp occurred, and if it did, +/// which setjmp it corresponds to, and jump to the right post-setjmp block. +/// We assume setjmp-longjmp handling always run after EH handling, which means +/// we don't expect any exception-related instructions when SjLj runs. +/// FIXME Currently this scheme does not support indirect call of setjmp, +/// because of the limitation of the scheme itself. fastcomp does not support it +/// either. +/// +/// In detail, this pass does following things: +/// +/// 1) Assumes the existence of global variables: __THREW__, __threwValue +/// __THREW__ and __threwValue will be set in invoke wrappers +/// in JS glue code. For what invoke wrappers are, refer to 3). These +/// variables are used for both exceptions and setjmp/longjmps. +/// __THREW__ indicates whether an exception or a longjmp occurred or not. 0 +/// means nothing occurred, 1 means an exception occurred, and other numbers +/// mean a longjmp occurred. In the case of longjmp, __threwValue variable +/// indicates the corresponding setjmp buffer the longjmp corresponds to. +/// +/// * Exception handling +/// +/// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions +/// at link time. +/// The global variables in 1) will exist in wasm address space, +/// but their values should be set in JS code, so these functions +/// as interfaces to JS glue code. These functions are equivalent to the +/// following JS functions, which actually exist in asm.js version of JS +/// library. +/// +/// function setThrew(threw, value) { +/// if (__THREW__ == 0) { +/// __THREW__ = threw; +/// __threwValue = value; +/// } +/// } +// +/// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. +/// +/// In exception handling, getTempRet0 indicates the type of an exception +/// caught, and in setjmp/longjmp, it means the second argument to longjmp +/// function. +/// +/// 3) Lower +/// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad +/// into +/// __THREW__ = 0; +/// call @__invoke_SIG(func, arg1, arg2) +/// %__THREW__.val = __THREW__; +/// __THREW__ = 0; +/// if (%__THREW__.val == 1) +/// goto %lpad +/// else +/// goto %invoke.cont +/// SIG is a mangled string generated based on the LLVM IR-level function +/// signature. After LLVM IR types are lowered to the target wasm types, +/// the names for these wrappers will change based on wasm types as well, +/// as in invoke_vi (function takes an int and returns void). The bodies of +/// these wrappers will be generated in JS glue code, and inside those +/// wrappers we use JS try-catch to generate actual exception effects. It +/// also calls the original callee function. An example wrapper in JS code +/// would look like this: +/// function invoke_vi(index,a1) { +/// try { +/// Module["dynCall_vi"](index,a1); // This calls original callee +/// } catch(e) { +/// if (typeof e !== 'number' && e !== 'longjmp') throw e; +/// asm["setThrew"](1, 0); // setThrew is called here +/// } +/// } +/// If an exception is thrown, __THREW__ will be set to true in a wrapper, +/// so we can jump to the right BB based on this value. +/// +/// 4) Lower +/// %val = landingpad catch c1 catch c2 catch c3 ... +/// ... use %val ... +/// into +/// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...) +/// %val = {%fmc, getTempRet0()} +/// ... use %val ... +/// Here N is a number calculated based on the number of clauses. +/// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. +/// +/// 5) Lower +/// resume {%a, %b} +/// into +/// call @__resumeException(%a) +/// where __resumeException() is a function in JS glue code. +/// +/// 6) Lower +/// call @llvm.eh.typeid.for(type) (intrinsic) +/// into +/// call @llvm_eh_typeid_for(type) +/// llvm_eh_typeid_for function will be generated in JS glue code. +/// +/// * Setjmp / Longjmp handling +/// +/// In case calls to longjmp() exists +/// +/// 1) Lower +/// longjmp(buf, value) +/// into +/// emscripten_longjmp_jmpbuf(buf, value) +/// emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later. +/// +/// In case calls to setjmp() exists +/// +/// 2) In the function entry that calls setjmp, initialize setjmpTable and +/// sejmpTableSize as follows: +/// setjmpTableSize = 4; +/// setjmpTable = (int *) malloc(40); +/// setjmpTable[0] = 0; +/// setjmpTable and setjmpTableSize are used in saveSetjmp() function in JS +/// code. +/// +/// 3) Lower +/// setjmp(buf) +/// into +/// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize); +/// setjmpTableSize = getTempRet0(); +/// For each dynamic setjmp call, setjmpTable stores its ID (a number which +/// is incrementally assigned from 0) and its label (a unique number that +/// represents each callsite of setjmp). When we need more entries in +/// setjmpTable, it is reallocated in saveSetjmp() in JS code and it will +/// return the new table address, and assign the new table size in +/// setTempRet0(). saveSetjmp also stores the setjmp's ID into the buffer +/// buf. A BB with setjmp is split into two after setjmp call in order to +/// make the post-setjmp BB the possible destination of longjmp BB. +/// +/// +/// 4) Lower every call that might longjmp into +/// __THREW__ = 0; +/// call @__invoke_SIG(func, arg1, arg2) +/// %__THREW__.val = __THREW__; +/// __THREW__ = 0; +/// if (%__THREW__.val != 0 & __threwValue != 0) { +/// %label = testSetjmp(mem[%__THREW__.val], setjmpTable, +/// setjmpTableSize); +/// if (%label == 0) +/// emscripten_longjmp(%__THREW__.val, __threwValue); +/// setTempRet0(__threwValue); +/// } else { +/// %label = -1; +/// } +/// longjmp_result = getTempRet0(); +/// switch label { +/// label 1: goto post-setjmp BB 1 +/// label 2: goto post-setjmp BB 2 +/// ... +/// default: goto splitted next BB +/// } +/// testSetjmp examines setjmpTable to see if there is a matching setjmp +/// call. After calling an invoke wrapper, if a longjmp occurred, __THREW__ +/// will be the address of matching jmp_buf buffer and __threwValue be the +/// second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is +/// stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to +/// each setjmp callsite. Label 0 means this longjmp buffer does not +/// correspond to one of the setjmp callsites in this function, so in this +/// case we just chain the longjmp to the caller. (Here we call +/// emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf. +/// emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while +/// emscripten_longjmp takes an int. Both of them will eventually be lowered +/// to emscripten_longjmp in s2wasm, but here we need two signatures - we +/// can't translate an int value to a jmp_buf.) +/// Label -1 means no longjmp occurred. Otherwise we jump to the right +/// post-setjmp BB based on the label. +/// +///===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-em-ehsjlj" + +static cl::list<std::string> + EHWhitelist("emscripten-cxx-exceptions-whitelist", + cl::desc("The list of function names in which Emscripten-style " + "exception handling is enabled (see emscripten " + "EMSCRIPTEN_CATCHING_WHITELIST options)"), + cl::CommaSeparated); + +namespace { +class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { + bool EnableEH; // Enable exception handling + bool EnableSjLj; // Enable setjmp/longjmp handling + + GlobalVariable *ThrewGV = nullptr; + GlobalVariable *ThrewValueGV = nullptr; + Function *GetTempRet0Func = nullptr; + Function *SetTempRet0Func = nullptr; + Function *ResumeF = nullptr; + Function *EHTypeIDF = nullptr; + Function *EmLongjmpF = nullptr; + Function *EmLongjmpJmpbufF = nullptr; + Function *SaveSetjmpF = nullptr; + Function *TestSetjmpF = nullptr; + + // __cxa_find_matching_catch_N functions. + // Indexed by the number of clauses in an original landingpad instruction. + DenseMap<int, Function *> FindMatchingCatches; + // Map of <function signature string, invoke_ wrappers> + StringMap<Function *> InvokeWrappers; + // Set of whitelisted function names for exception handling + std::set<std::string> EHWhitelistSet; + + StringRef getPassName() const override { + return "WebAssembly Lower Emscripten Exceptions"; + } + + bool runEHOnFunction(Function &F); + bool runSjLjOnFunction(Function &F); + Function *getFindMatchingCatch(Module &M, unsigned NumClauses); + + template <typename CallOrInvoke> Value *wrapInvoke(CallOrInvoke *CI); + void wrapTestSetjmp(BasicBlock *BB, Instruction *InsertPt, Value *Threw, + Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label, + Value *&LongjmpResult, BasicBlock *&EndBB); + template <typename CallOrInvoke> Function *getInvokeWrapper(CallOrInvoke *CI); + + bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); } + bool canLongjmp(Module &M, const Value *Callee) const; + bool isEmAsmCall(Module &M, const Value *Callee) const; + + void rebuildSSA(Function &F); + +public: + static char ID; + + WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true) + : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) { + EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end()); + } + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + } +}; +} // End anonymous namespace + +char WebAssemblyLowerEmscriptenEHSjLj::ID = 0; +INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE, + "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp", + false, false) + +ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH, + bool EnableSjLj) { + return new WebAssemblyLowerEmscriptenEHSjLj(EnableEH, EnableSjLj); +} + +static bool canThrow(const Value *V) { + if (const auto *F = dyn_cast<const Function>(V)) { + // Intrinsics cannot throw + if (F->isIntrinsic()) + return false; + StringRef Name = F->getName(); + // leave setjmp and longjmp (mostly) alone, we process them properly later + if (Name == "setjmp" || Name == "longjmp") + return false; + return !F->doesNotThrow(); + } + // not a function, so an indirect call - can throw, we can't tell + return true; +} + +// Get a global variable with the given name. If it doesn't exist declare it, +// which will generate an import and asssumes that it will exist at link time. +static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB, + const char *Name) { + + auto *GV = + dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, IRB.getInt32Ty())); + if (!GV) + report_fatal_error(Twine("unable to create global: ") + Name); + + return GV; +} + +// Simple function name mangler. +// This function simply takes LLVM's string representation of parameter types +// and concatenate them with '_'. There are non-alphanumeric characters but llc +// is ok with it, and we need to postprocess these names after the lowering +// phase anyway. +static std::string getSignature(FunctionType *FTy) { + std::string Sig; + raw_string_ostream OS(Sig); + OS << *FTy->getReturnType(); + for (Type *ParamTy : FTy->params()) + OS << "_" << *ParamTy; + if (FTy->isVarArg()) + OS << "_..."; + Sig = OS.str(); + Sig.erase(remove_if(Sig, isspace), Sig.end()); + // When s2wasm parses .s file, a comma means the end of an argument. So a + // mangled function name can contain any character but a comma. + std::replace(Sig.begin(), Sig.end(), ',', '.'); + return Sig; +} + +// Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. +// This is because a landingpad instruction contains two more arguments, a +// personality function and a cleanup bit, and __cxa_find_matching_catch_N +// functions are named after the number of arguments in the original landingpad +// instruction. +Function * +WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, + unsigned NumClauses) { + if (FindMatchingCatches.count(NumClauses)) + return FindMatchingCatches[NumClauses]; + PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); + SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy); + FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false); + Function *F = Function::Create( + FTy, GlobalValue::ExternalLinkage, + "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M); + FindMatchingCatches[NumClauses] = F; + return F; +} + +// Generate invoke wrapper seqence with preamble and postamble +// Preamble: +// __THREW__ = 0; +// Postamble: +// %__THREW__.val = __THREW__; __THREW__ = 0; +// Returns %__THREW__.val, which indicates whether an exception is thrown (or +// whether longjmp occurred), for future use. +template <typename CallOrInvoke> +Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { + LLVMContext &C = CI->getModule()->getContext(); + + // If we are calling a function that is noreturn, we must remove that + // attribute. The code we insert here does expect it to return, after we + // catch the exception. + if (CI->doesNotReturn()) { + if (auto *F = dyn_cast<Function>(CI->getCalledValue())) + F->removeFnAttr(Attribute::NoReturn); + CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn); + } + + IRBuilder<> IRB(C); + IRB.SetInsertPoint(CI); + + // Pre-invoke + // __THREW__ = 0; + IRB.CreateStore(IRB.getInt32(0), ThrewGV); + + // Invoke function wrapper in JavaScript + SmallVector<Value *, 16> Args; + // Put the pointer to the callee as first argument, so it can be called + // within the invoke wrapper later + Args.push_back(CI->getCalledValue()); + Args.append(CI->arg_begin(), CI->arg_end()); + CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args); + NewCall->takeName(CI); + NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke); + NewCall->setDebugLoc(CI->getDebugLoc()); + + // Because we added the pointer to the callee as first argument, all + // argument attribute indices have to be incremented by one. + SmallVector<AttributeSet, 8> ArgAttributes; + const AttributeList &InvokeAL = CI->getAttributes(); + + // No attributes for the callee pointer. + ArgAttributes.push_back(AttributeSet()); + // Copy the argument attributes from the original + for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I) + ArgAttributes.push_back(InvokeAL.getParamAttributes(I)); + + AttrBuilder FnAttrs(InvokeAL.getFnAttributes()); + if (FnAttrs.contains(Attribute::AllocSize)) { + // The allocsize attribute (if any) referes to parameters by index and needs + // to be adjusted. + unsigned SizeArg; + Optional<unsigned> NEltArg; + std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs(); + SizeArg += 1; + if (NEltArg.hasValue()) + NEltArg = NEltArg.getValue() + 1; + FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); + } + + // Reconstruct the AttributesList based on the vector we constructed. + AttributeList NewCallAL = + AttributeList::get(C, AttributeSet::get(C, FnAttrs), + InvokeAL.getRetAttributes(), ArgAttributes); + NewCall->setAttributes(NewCallAL); + + CI->replaceAllUsesWith(NewCall); + + // Post-invoke + // %__THREW__.val = __THREW__; __THREW__ = 0; + Value *Threw = + IRB.CreateLoad(IRB.getInt32Ty(), ThrewGV, ThrewGV->getName() + ".val"); + IRB.CreateStore(IRB.getInt32(0), ThrewGV); + return Threw; +} + +// Get matching invoke wrapper based on callee signature +template <typename CallOrInvoke> +Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { + Module *M = CI->getModule(); + SmallVector<Type *, 16> ArgTys; + Value *Callee = CI->getCalledValue(); + FunctionType *CalleeFTy; + if (auto *F = dyn_cast<Function>(Callee)) + CalleeFTy = F->getFunctionType(); + else { + auto *CalleeTy = cast<PointerType>(Callee->getType())->getElementType(); + CalleeFTy = dyn_cast<FunctionType>(CalleeTy); + } + + std::string Sig = getSignature(CalleeFTy); + if (InvokeWrappers.find(Sig) != InvokeWrappers.end()) + return InvokeWrappers[Sig]; + + // Put the pointer to the callee as first argument + ArgTys.push_back(PointerType::getUnqual(CalleeFTy)); + // Add argument types + ArgTys.append(CalleeFTy->param_begin(), CalleeFTy->param_end()); + + FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys, + CalleeFTy->isVarArg()); + Function *F = + Function::Create(FTy, GlobalValue::ExternalLinkage, "__invoke_" + Sig, M); + InvokeWrappers[Sig] = F; + return F; +} + +bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M, + const Value *Callee) const { + if (auto *CalleeF = dyn_cast<Function>(Callee)) + if (CalleeF->isIntrinsic()) + return false; + + // Attempting to transform inline assembly will result in something like: + // call void @__invoke_void(void ()* asm ...) + // which is invalid because inline assembly blocks do not have addresses + // and can't be passed by pointer. The result is a crash with illegal IR. + if (isa<InlineAsm>(Callee)) + return false; + StringRef CalleeName = Callee->getName(); + + // The reason we include malloc/free here is to exclude the malloc/free + // calls generated in setjmp prep / cleanup routines. + if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free") + return false; + + // There are functions in JS glue code + if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" || + CalleeName == "saveSetjmp" || CalleeName == "testSetjmp" || + CalleeName == "getTempRet0" || CalleeName == "setTempRet0") + return false; + + // __cxa_find_matching_catch_N functions cannot longjmp + if (Callee->getName().startswith("__cxa_find_matching_catch_")) + return false; + + // Exception-catching related functions + if (CalleeName == "__cxa_begin_catch" || CalleeName == "__cxa_end_catch" || + CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" || + CalleeName == "__clang_call_terminate") + return false; + + // Otherwise we don't know + return true; +} + +bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M, + const Value *Callee) const { + StringRef CalleeName = Callee->getName(); + // This is an exhaustive list from Emscripten's <emscripten/em_asm.h>. + return CalleeName == "emscripten_asm_const_int" || + CalleeName == "emscripten_asm_const_double" || + CalleeName == "emscripten_asm_const_int_sync_on_main_thread" || + CalleeName == "emscripten_asm_const_double_sync_on_main_thread" || + CalleeName == "emscripten_asm_const_async_on_main_thread"; +} + +// Generate testSetjmp function call seqence with preamble and postamble. +// The code this generates is equivalent to the following JavaScript code: +// if (%__THREW__.val != 0 & threwValue != 0) { +// %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize); +// if (%label == 0) +// emscripten_longjmp(%__THREW__.val, threwValue); +// setTempRet0(threwValue); +// } else { +// %label = -1; +// } +// %longjmp_result = getTempRet0(); +// +// As output parameters. returns %label, %longjmp_result, and the BB the last +// instruction (%longjmp_result = ...) is in. +void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( + BasicBlock *BB, Instruction *InsertPt, Value *Threw, Value *SetjmpTable, + Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult, + BasicBlock *&EndBB) { + Function *F = BB->getParent(); + LLVMContext &C = BB->getModule()->getContext(); + IRBuilder<> IRB(C); + IRB.SetInsertPoint(InsertPt); + + // if (%__THREW__.val != 0 & threwValue != 0) + IRB.SetInsertPoint(BB); + BasicBlock *ThenBB1 = BasicBlock::Create(C, "if.then1", F); + BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F); + BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F); + Value *ThrewCmp = IRB.CreateICmpNE(Threw, IRB.getInt32(0)); + Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV, + ThrewValueGV->getName() + ".val"); + Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0)); + Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1"); + IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1); + + // %label = _testSetjmp(mem[%__THREW__.val], _setjmpTable, _setjmpTableSize); + // if (%label == 0) + IRB.SetInsertPoint(ThenBB1); + BasicBlock *ThenBB2 = BasicBlock::Create(C, "if.then2", F); + BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F); + Value *ThrewInt = IRB.CreateIntToPtr(Threw, Type::getInt32PtrTy(C), + Threw->getName() + ".i32p"); + Value *LoadedThrew = IRB.CreateLoad(IRB.getInt32Ty(), ThrewInt, + ThrewInt->getName() + ".loaded"); + Value *ThenLabel = IRB.CreateCall( + TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label"); + Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0)); + IRB.CreateCondBr(Cmp2, ThenBB2, EndBB2); + + // emscripten_longjmp(%__THREW__.val, threwValue); + IRB.SetInsertPoint(ThenBB2); + IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue}); + IRB.CreateUnreachable(); + + // setTempRet0(threwValue); + IRB.SetInsertPoint(EndBB2); + IRB.CreateCall(SetTempRet0Func, ThrewValue); + IRB.CreateBr(EndBB1); + + IRB.SetInsertPoint(ElseBB1); + IRB.CreateBr(EndBB1); + + // longjmp_result = getTempRet0(); + IRB.SetInsertPoint(EndBB1); + PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label"); + LabelPHI->addIncoming(ThenLabel, EndBB2); + + LabelPHI->addIncoming(IRB.getInt32(-1), ElseBB1); + + // Output parameter assignment + Label = LabelPHI; + EndBB = EndBB1; + LongjmpResult = IRB.CreateCall(GetTempRet0Func, None, "longjmp_result"); +} + +void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); + DT.recalculate(F); // CFG has been changed + SSAUpdater SSA; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + SSA.Initialize(I.getType(), I.getName()); + SSA.AddAvailableValue(&BB, &I); + for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) { + Use &U = *UI; + ++UI; + auto *User = cast<Instruction>(U.getUser()); + if (auto *UserPN = dyn_cast<PHINode>(User)) + if (UserPN->getIncomingBlock(U) == &BB) + continue; + + if (DT.dominates(&I, User)) + continue; + SSA.RewriteUseAfterInsertions(U); + } + } + } +} + +bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { + LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n"); + + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + + Function *SetjmpF = M.getFunction("setjmp"); + Function *LongjmpF = M.getFunction("longjmp"); + bool SetjmpUsed = SetjmpF && !SetjmpF->use_empty(); + bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty(); + bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed); + + // Declare (or get) global variables __THREW__, __threwValue, and + // getTempRet0/setTempRet0 function which are used in common for both + // exception handling and setjmp/longjmp handling + ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__"); + ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue"); + GetTempRet0Func = + Function::Create(FunctionType::get(IRB.getInt32Ty(), false), + GlobalValue::ExternalLinkage, "getTempRet0", &M); + SetTempRet0Func = Function::Create( + FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false), + GlobalValue::ExternalLinkage, "setTempRet0", &M); + GetTempRet0Func->setDoesNotThrow(); + SetTempRet0Func->setDoesNotThrow(); + + bool Changed = false; + + // Exception handling + if (EnableEH) { + // Register __resumeException function + FunctionType *ResumeFTy = + FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false); + ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage, + "__resumeException", &M); + + // Register llvm_eh_typeid_for function + FunctionType *EHTypeIDTy = + FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false); + EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage, + "llvm_eh_typeid_for", &M); + + for (Function &F : M) { + if (F.isDeclaration()) + continue; + Changed |= runEHOnFunction(F); + } + } + + // Setjmp/longjmp handling + if (DoSjLj) { + Changed = true; // We have setjmp or longjmp somewhere + + if (LongjmpF) { + // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is + // defined in JS code + EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(), + GlobalValue::ExternalLinkage, + "emscripten_longjmp_jmpbuf", &M); + + LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF); + } + + if (SetjmpF) { + // Register saveSetjmp function + FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); + SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0), + IRB.getInt32Ty(), Type::getInt32PtrTy(C), + IRB.getInt32Ty()}; + FunctionType *FTy = + FunctionType::get(Type::getInt32PtrTy(C), Params, false); + SaveSetjmpF = + Function::Create(FTy, GlobalValue::ExternalLinkage, "saveSetjmp", &M); + + // Register testSetjmp function + Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}; + FTy = FunctionType::get(IRB.getInt32Ty(), Params, false); + TestSetjmpF = + Function::Create(FTy, GlobalValue::ExternalLinkage, "testSetjmp", &M); + + FTy = FunctionType::get(IRB.getVoidTy(), + {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); + EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, + "emscripten_longjmp", &M); + + // Only traverse functions that uses setjmp in order not to insert + // unnecessary prep / cleanup code in every function + SmallPtrSet<Function *, 8> SetjmpUsers; + for (User *U : SetjmpF->users()) { + auto *UI = cast<Instruction>(U); + SetjmpUsers.insert(UI->getFunction()); + } + for (Function *F : SetjmpUsers) + runSjLjOnFunction(*F); + } + } + + if (!Changed) { + // Delete unused global variables and functions + if (ResumeF) + ResumeF->eraseFromParent(); + if (EHTypeIDF) + EHTypeIDF->eraseFromParent(); + if (EmLongjmpF) + EmLongjmpF->eraseFromParent(); + if (SaveSetjmpF) + SaveSetjmpF->eraseFromParent(); + if (TestSetjmpF) + TestSetjmpF->eraseFromParent(); + return false; + } + + return true; +} + +bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { + Module &M = *F.getParent(); + LLVMContext &C = F.getContext(); + IRBuilder<> IRB(C); + bool Changed = false; + SmallVector<Instruction *, 64> ToErase; + SmallPtrSet<LandingPadInst *, 32> LandingPads; + bool AllowExceptions = + areAllExceptionsAllowed() || EHWhitelistSet.count(F.getName()); + + for (BasicBlock &BB : F) { + auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + Changed = true; + LandingPads.insert(II->getLandingPadInst()); + IRB.SetInsertPoint(II); + + bool NeedInvoke = AllowExceptions && canThrow(II->getCalledValue()); + if (NeedInvoke) { + // Wrap invoke with invoke wrapper and generate preamble/postamble + Value *Threw = wrapInvoke(II); + ToErase.push_back(II); + + // Insert a branch based on __THREW__ variable + Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(1), "cmp"); + IRB.CreateCondBr(Cmp, II->getUnwindDest(), II->getNormalDest()); + + } else { + // This can't throw, and we don't need this invoke, just replace it with a + // call+branch + SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end()); + CallInst *NewCall = + IRB.CreateCall(II->getFunctionType(), II->getCalledValue(), Args); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setDebugLoc(II->getDebugLoc()); + NewCall->setAttributes(II->getAttributes()); + II->replaceAllUsesWith(NewCall); + ToErase.push_back(II); + + IRB.CreateBr(II->getNormalDest()); + + // Remove any PHI node entries from the exception destination + II->getUnwindDest()->removePredecessor(&BB); + } + } + + // Process resume instructions + for (BasicBlock &BB : F) { + // Scan the body of the basic block for resumes + for (Instruction &I : BB) { + auto *RI = dyn_cast<ResumeInst>(&I); + if (!RI) + continue; + + // Split the input into legal values + Value *Input = RI->getValue(); + IRB.SetInsertPoint(RI); + Value *Low = IRB.CreateExtractValue(Input, 0, "low"); + // Create a call to __resumeException function + IRB.CreateCall(ResumeF, {Low}); + // Add a terminator to the block + IRB.CreateUnreachable(); + ToErase.push_back(RI); + } + } + + // Process llvm.eh.typeid.for intrinsics + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + auto *CI = dyn_cast<CallInst>(&I); + if (!CI) + continue; + const Function *Callee = CI->getCalledFunction(); + if (!Callee) + continue; + if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) + continue; + + IRB.SetInsertPoint(CI); + CallInst *NewCI = + IRB.CreateCall(EHTypeIDF, CI->getArgOperand(0), "typeid"); + CI->replaceAllUsesWith(NewCI); + ToErase.push_back(CI); + } + } + + // Look for orphan landingpads, can occur in blocks with no predecessors + for (BasicBlock &BB : F) { + Instruction *I = BB.getFirstNonPHI(); + if (auto *LPI = dyn_cast<LandingPadInst>(I)) + LandingPads.insert(LPI); + } + + // Handle all the landingpad for this function together, as multiple invokes + // may share a single lp + for (LandingPadInst *LPI : LandingPads) { + IRB.SetInsertPoint(LPI); + SmallVector<Value *, 16> FMCArgs; + for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { + Constant *Clause = LPI->getClause(I); + // As a temporary workaround for the lack of aggregate varargs support + // in the interface between JS and wasm, break out filter operands into + // their component elements. + if (LPI->isFilter(I)) { + auto *ATy = cast<ArrayType>(Clause->getType()); + for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) { + Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter"); + FMCArgs.push_back(EV); + } + } else + FMCArgs.push_back(Clause); + } + + // Create a call to __cxa_find_matching_catch_N function + Function *FMCF = getFindMatchingCatch(M, FMCArgs.size()); + CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc"); + Value *Undef = UndefValue::get(LPI->getType()); + Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0"); + Value *TempRet0 = IRB.CreateCall(GetTempRet0Func, None, "tempret0"); + Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1"); + + LPI->replaceAllUsesWith(Pair1); + ToErase.push_back(LPI); + } + + // Erase everything we no longer need in this function + for (Instruction *I : ToErase) + I->eraseFromParent(); + + return Changed; +} + +bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { + Module &M = *F.getParent(); + LLVMContext &C = F.getContext(); + IRBuilder<> IRB(C); + SmallVector<Instruction *, 64> ToErase; + // Vector of %setjmpTable values + std::vector<Instruction *> SetjmpTableInsts; + // Vector of %setjmpTableSize values + std::vector<Instruction *> SetjmpTableSizeInsts; + + // Setjmp preparation + + // This instruction effectively means %setjmpTableSize = 4. + // We create this as an instruction intentionally, and we don't want to fold + // this instruction to a constant 4, because this value will be used in + // SSAUpdater.AddAvailableValue(...) later. + BasicBlock &EntryBB = F.getEntryBlock(); + BinaryOperator *SetjmpTableSize = BinaryOperator::Create( + Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize", + &*EntryBB.getFirstInsertionPt()); + // setjmpTable = (int *) malloc(40); + Instruction *SetjmpTable = CallInst::CreateMalloc( + SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40), + nullptr, nullptr, "setjmpTable"); + // setjmpTable[0] = 0; + IRB.SetInsertPoint(SetjmpTableSize); + IRB.CreateStore(IRB.getInt32(0), SetjmpTable); + SetjmpTableInsts.push_back(SetjmpTable); + SetjmpTableSizeInsts.push_back(SetjmpTableSize); + + // Setjmp transformation + std::vector<PHINode *> SetjmpRetPHIs; + Function *SetjmpF = M.getFunction("setjmp"); + for (User *U : SetjmpF->users()) { + auto *CI = dyn_cast<CallInst>(U); + if (!CI) + report_fatal_error("Does not support indirect calls to setjmp"); + + BasicBlock *BB = CI->getParent(); + if (BB->getParent() != &F) // in other function + continue; + + // The tail is everything right after the call, and will be reached once + // when setjmp is called, and later when longjmp returns to the setjmp + BasicBlock *Tail = SplitBlock(BB, CI->getNextNode()); + // Add a phi to the tail, which will be the output of setjmp, which + // indicates if this is the first call or a longjmp back. The phi directly + // uses the right value based on where we arrive from + IRB.SetInsertPoint(Tail->getFirstNonPHI()); + PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret"); + + // setjmp initial call returns 0 + SetjmpRet->addIncoming(IRB.getInt32(0), BB); + // The proper output is now this, not the setjmp call itself + CI->replaceAllUsesWith(SetjmpRet); + // longjmp returns to the setjmp will add themselves to this phi + SetjmpRetPHIs.push_back(SetjmpRet); + + // Fix call target + // Our index in the function is our place in the array + 1 to avoid index + // 0, because index 0 means the longjmp is not ours to handle. + IRB.SetInsertPoint(CI); + Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()), + SetjmpTable, SetjmpTableSize}; + Instruction *NewSetjmpTable = + IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable"); + Instruction *NewSetjmpTableSize = + IRB.CreateCall(GetTempRet0Func, None, "setjmpTableSize"); + SetjmpTableInsts.push_back(NewSetjmpTable); + SetjmpTableSizeInsts.push_back(NewSetjmpTableSize); + ToErase.push_back(CI); + } + + // Update each call that can longjmp so it can return to a setjmp where + // relevant. + + // Because we are creating new BBs while processing and don't want to make + // all these newly created BBs candidates again for longjmp processing, we + // first make the vector of candidate BBs. + std::vector<BasicBlock *> BBs; + for (BasicBlock &BB : F) + BBs.push_back(&BB); + + // BBs.size() will change within the loop, so we query it every time + for (unsigned I = 0; I < BBs.size(); I++) { + BasicBlock *BB = BBs[I]; + for (Instruction &I : *BB) { + assert(!isa<InvokeInst>(&I)); + auto *CI = dyn_cast<CallInst>(&I); + if (!CI) + continue; + + const Value *Callee = CI->getCalledValue(); + if (!canLongjmp(M, Callee)) + continue; + if (isEmAsmCall(M, Callee)) + report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " + + F.getName() + + ". Please consider using EM_JS, or move the " + "EM_ASM into another function.", + false); + + Value *Threw = nullptr; + BasicBlock *Tail; + if (Callee->getName().startswith("__invoke_")) { + // If invoke wrapper has already been generated for this call in + // previous EH phase, search for the load instruction + // %__THREW__.val = __THREW__; + // in postamble after the invoke wrapper call + LoadInst *ThrewLI = nullptr; + StoreInst *ThrewResetSI = nullptr; + for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end(); + I != IE; ++I) { + if (auto *LI = dyn_cast<LoadInst>(I)) + if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand())) + if (GV == ThrewGV) { + Threw = ThrewLI = LI; + break; + } + } + // Search for the store instruction after the load above + // __THREW__ = 0; + for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end(); + I != IE; ++I) { + if (auto *SI = dyn_cast<StoreInst>(I)) + if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand())) + if (GV == ThrewGV && SI->getValueOperand() == IRB.getInt32(0)) { + ThrewResetSI = SI; + break; + } + } + assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke"); + assert(ThrewResetSI && "Cannot find __THREW__ store after invoke"); + Tail = SplitBlock(BB, ThrewResetSI->getNextNode()); + + } else { + // Wrap call with invoke wrapper and generate preamble/postamble + Threw = wrapInvoke(CI); + ToErase.push_back(CI); + Tail = SplitBlock(BB, CI->getNextNode()); + } + + // We need to replace the terminator in Tail - SplitBlock makes BB go + // straight to Tail, we need to check if a longjmp occurred, and go to the + // right setjmp-tail if so + ToErase.push_back(BB->getTerminator()); + + // Generate a function call to testSetjmp function and preamble/postamble + // code to figure out (1) whether longjmp occurred (2) if longjmp + // occurred, which setjmp it corresponds to + Value *Label = nullptr; + Value *LongjmpResult = nullptr; + BasicBlock *EndBB = nullptr; + wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label, + LongjmpResult, EndBB); + assert(Label && LongjmpResult && EndBB); + + // Create switch instruction + IRB.SetInsertPoint(EndBB); + SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size()); + // -1 means no longjmp happened, continue normally (will hit the default + // switch case). 0 means a longjmp that is not ours to handle, needs a + // rethrow. Otherwise the index is the same as the index in P+1 (to avoid + // 0). + for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { + SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent()); + SetjmpRetPHIs[I]->addIncoming(LongjmpResult, EndBB); + } + + // We are splitting the block here, and must continue to find other calls + // in the block - which is now split. so continue to traverse in the Tail + BBs.push_back(Tail); + } + } + + // Erase everything we no longer need in this function + for (Instruction *I : ToErase) + I->eraseFromParent(); + + // Free setjmpTable buffer before each return instruction + for (BasicBlock &BB : F) { + Instruction *TI = BB.getTerminator(); + if (isa<ReturnInst>(TI)) + CallInst::CreateFree(SetjmpTable, TI); + } + + // Every call to saveSetjmp can change setjmpTable and setjmpTableSize + // (when buffer reallocation occurs) + // entry: + // setjmpTableSize = 4; + // setjmpTable = (int *) malloc(40); + // setjmpTable[0] = 0; + // ... + // somebb: + // setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize); + // setjmpTableSize = getTempRet0(); + // So we need to make sure the SSA for these variables is valid so that every + // saveSetjmp and testSetjmp calls have the correct arguments. + SSAUpdater SetjmpTableSSA; + SSAUpdater SetjmpTableSizeSSA; + SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable"); + SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize"); + for (Instruction *I : SetjmpTableInsts) + SetjmpTableSSA.AddAvailableValue(I->getParent(), I); + for (Instruction *I : SetjmpTableSizeInsts) + SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I); + + for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end(); + UI != UE;) { + // Grab the use before incrementing the iterator. + Use &U = *UI; + // Increment the iterator before removing the use from the list. + ++UI; + if (auto *I = dyn_cast<Instruction>(U.getUser())) + if (I->getParent() != &EntryBB) + SetjmpTableSSA.RewriteUse(U); + } + for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end(); + UI != UE;) { + Use &U = *UI; + ++UI; + if (auto *I = dyn_cast<Instruction>(U.getUser())) + if (I->getParent() != &EntryBB) + SetjmpTableSizeSSA.RewriteUse(U); + } + + // Finally, our modifications to the cfg can break dominance of SSA variables. + // For example, in this code, + // if (x()) { .. setjmp() .. } + // if (y()) { .. longjmp() .. } + // We must split the longjmp block, and it can jump into the block splitted + // from setjmp one. But that means that when we split the setjmp block, it's + // first part no longer dominates its second part - there is a theoretically + // possible control flow path where x() is false, then y() is true and we + // reach the second part of the setjmp block, without ever reaching the first + // part. So, we rebuild SSA form here. + rebuildSSA(F); + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp new file mode 100644 index 000000000000..750b2233e67a --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp @@ -0,0 +1,190 @@ +//===-- WebAssemblyLowerGlobalDtors.cpp - Lower @llvm.global_dtors --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Lower @llvm.global_dtors. +/// +/// WebAssembly doesn't have a builtin way to invoke static destructors. +/// Implement @llvm.global_dtors by creating wrapper functions that are +/// registered in @llvm.global_ctors and which contain a call to +/// `__cxa_atexit` to register their destructor functions. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-global-dtors" + +namespace { +class LowerGlobalDtors final : public ModulePass { + StringRef getPassName() const override { + return "WebAssembly Lower @llvm.global_dtors"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override; + +public: + static char ID; + LowerGlobalDtors() : ModulePass(ID) {} +}; +} // End anonymous namespace + +char LowerGlobalDtors::ID = 0; +INITIALIZE_PASS(LowerGlobalDtors, DEBUG_TYPE, + "Lower @llvm.global_dtors for WebAssembly", false, false) + +ModulePass *llvm::createWebAssemblyLowerGlobalDtors() { + return new LowerGlobalDtors(); +} + +bool LowerGlobalDtors::runOnModule(Module &M) { + LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n"); + + GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors"); + if (!GV || !GV->hasInitializer()) + return false; + + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (!InitList) + return false; + + // Sanity-check @llvm.global_dtor's type. + auto *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); + if (!ETy || ETy->getNumElements() != 3 || + !ETy->getTypeAtIndex(0U)->isIntegerTy() || + !ETy->getTypeAtIndex(1U)->isPointerTy() || + !ETy->getTypeAtIndex(2U)->isPointerTy()) + return false; // Not (int, ptr, ptr). + + // Collect the contents of @llvm.global_dtors, collated by priority and + // associated symbol. + std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs; + for (Value *O : InitList->operands()) { + auto *CS = dyn_cast<ConstantStruct>(O); + if (!CS) + continue; // Malformed. + + auto *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!Priority) + continue; // Malformed. + uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX); + + Constant *DtorFunc = CS->getOperand(1); + if (DtorFunc->isNullValue()) + break; // Found a null terminator, skip the rest. + + Constant *Associated = CS->getOperand(2); + Associated = cast<Constant>(Associated->stripPointerCasts()); + + DtorFuncs[PriorityValue][Associated].push_back(DtorFunc); + } + if (DtorFuncs.empty()) + return false; + + // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); + LLVMContext &C = M.getContext(); + PointerType *VoidStar = Type::getInt8PtrTy(C); + Type *AtExitFuncArgs[] = {VoidStar}; + FunctionType *AtExitFuncTy = + FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs, + /*isVarArg=*/false); + + FunctionCallee AtExit = M.getOrInsertFunction( + "__cxa_atexit", + FunctionType::get(Type::getInt32Ty(C), + {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar}, + /*isVarArg=*/false)); + + // Declare __dso_local. + Constant *DsoHandle = M.getNamedValue("__dso_handle"); + if (!DsoHandle) { + Type *DsoHandleTy = Type::getInt8Ty(C); + GlobalVariable *Handle = new GlobalVariable( + M, DsoHandleTy, /*isConstant=*/true, + GlobalVariable::ExternalWeakLinkage, nullptr, "__dso_handle"); + Handle->setVisibility(GlobalVariable::HiddenVisibility); + DsoHandle = Handle; + } + + // For each unique priority level and associated symbol, generate a function + // to call all the destructors at that level, and a function to register the + // first function with __cxa_atexit. + for (auto &PriorityAndMore : DtorFuncs) { + uint16_t Priority = PriorityAndMore.first; + for (auto &AssociatedAndMore : PriorityAndMore.second) { + Constant *Associated = AssociatedAndMore.first; + + Function *CallDtors = Function::Create( + AtExitFuncTy, Function::PrivateLinkage, + "call_dtors" + + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) + : Twine()) + + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) + : Twine()), + &M); + BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors); + FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C), + /*isVarArg=*/false); + + for (auto Dtor : AssociatedAndMore.second) + CallInst::Create(VoidVoid, Dtor, "", BB); + ReturnInst::Create(C, BB); + + Function *RegisterCallDtors = Function::Create( + VoidVoid, Function::PrivateLinkage, + "register_call_dtors" + + (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority)) + : Twine()) + + (!Associated->isNullValue() ? (Twine(".") + Associated->getName()) + : Twine()), + &M); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors); + BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors); + BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors); + + Value *Null = ConstantPointerNull::get(VoidStar); + Value *Args[] = {CallDtors, Null, DsoHandle}; + Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB); + Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res, + Constant::getNullValue(Res->getType())); + BranchInst::Create(FailBB, RetBB, Cmp, EntryBB); + + // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave. + // This should be very rare, because if the process is running out of + // memory before main has even started, something is wrong. + CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "", + FailBB); + new UnreachableInst(C, FailBB); + + ReturnInst::Create(C, RetBB); + + // Now register the registration function with @llvm.global_ctors. + appendToGlobalCtors(M, RegisterCallDtors, Priority, Associated); + } + } + + // Now that we've lowered everything, remove @llvm.global_dtors. + GV->eraseFromParent(); + + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp new file mode 100644 index 000000000000..59c10243c545 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -0,0 +1,341 @@ +// WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst // +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains code to lower WebAssembly MachineInstrs to their +/// corresponding MCInst records. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMCInstLower.h" +#include "WebAssemblyAsmPrinter.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyRuntimeLibcallSignatures.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Constants.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Defines llvm::WebAssembly::getStackOpcode to convert register instructions to +// stack instructions +#define GET_INSTRMAP_INFO 1 +#include "WebAssemblyGenInstrInfo.inc" + +// This disables the removal of registers when lowering into MC, as required +// by some current tests. +cl::opt<bool> + WasmKeepRegisters("wasm-keep-registers", cl::Hidden, + cl::desc("WebAssembly: output stack registers in" + " instruction output for test purposes only."), + cl::init(false)); + +static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI); + +MCSymbol * +WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { + const GlobalValue *Global = MO.getGlobal(); + auto *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global)); + + if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) { + const MachineFunction &MF = *MO.getParent()->getParent()->getParent(); + const TargetMachine &TM = MF.getTarget(); + const Function &CurrentFunc = MF.getFunction(); + + SmallVector<MVT, 1> ResultMVTs; + SmallVector<MVT, 4> ParamMVTs; + computeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs); + + auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs); + WasmSym->setSignature(Signature.get()); + Printer.addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + } + + return WasmSym; +} + +MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( + const MachineOperand &MO) const { + const char *Name = MO.getSymbolName(); + auto *WasmSym = cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name)); + const WebAssemblySubtarget &Subtarget = Printer.getSubtarget(); + + // Except for certain known symbols, all symbols used by CodeGen are + // functions. It's OK to hardcode knowledge of specific symbols here; this + // method is precisely there for fetching the signatures of known + // Clang-provided symbols. + if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 || + strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 || + strcmp(Name, "__tls_size") == 0 || strcmp(Name, "__tls_align") == 0) { + bool Mutable = + strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); + WasmSym->setGlobalType(wasm::WasmGlobalType{ + uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 + : wasm::WASM_TYPE_I32), + Mutable}); + return WasmSym; + } + + SmallVector<wasm::ValType, 4> Returns; + SmallVector<wasm::ValType, 4> Params; + if (strcmp(Name, "__cpp_exception") == 0) { + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT); + // We can't confirm its signature index for now because there can be + // imported exceptions. Set it to be 0 for now. + WasmSym->setEventType( + {wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION, /* SigIndex */ 0}); + // We may have multiple C++ compilation units to be linked together, each of + // which defines the exception symbol. To resolve them, we declare them as + // weak. + WasmSym->setWeak(true); + WasmSym->setExternal(true); + + // All C++ exceptions are assumed to have a single i32 (for wasm32) or i64 + // (for wasm64) param type and void return type. The reaon is, all C++ + // exception values are pointers, and to share the type section with + // functions, exceptions are assumed to have void return type. + Params.push_back(Subtarget.hasAddr64() ? wasm::ValType::I64 + : wasm::ValType::I32); + } else { // Function symbols + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + getLibcallSignature(Subtarget, Name, Returns, Params); + } + auto Signature = + std::make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params)); + WasmSym->setSignature(Signature.get()); + Printer.addSignature(std::move(Signature)); + + return WasmSym; +} + +MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + unsigned TargetFlags = MO.getTargetFlags(); + + switch (TargetFlags) { + case WebAssemblyII::MO_NO_FLAG: + break; + case WebAssemblyII::MO_GOT: + Kind = MCSymbolRefExpr::VK_GOT; + break; + case WebAssemblyII::MO_MEMORY_BASE_REL: + Kind = MCSymbolRefExpr::VK_WASM_MBREL; + break; + case WebAssemblyII::MO_TABLE_BASE_REL: + Kind = MCSymbolRefExpr::VK_WASM_TBREL; + break; + default: + llvm_unreachable("Unknown target flag on GV operand"); + } + + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Kind, Ctx); + + if (MO.getOffset() != 0) { + const auto *WasmSym = cast<MCSymbolWasm>(Sym); + if (TargetFlags == WebAssemblyII::MO_GOT) + report_fatal_error("GOT symbol references do not support offsets"); + if (WasmSym->isFunction()) + report_fatal_error("Function addresses with offsets not supported"); + if (WasmSym->isGlobal()) + report_fatal_error("Global indexes with offsets not supported"); + if (WasmSym->isEvent()) + report_fatal_error("Event indexes with offsets not supported"); + + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + } + + return MCOperand::createExpr(Expr); +} + +MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand( + SmallVector<wasm::ValType, 1> &&Returns, + SmallVector<wasm::ValType, 4> &&Params) const { + auto Signature = std::make_unique<wasm::WasmSignature>(std::move(Returns), + std::move(Params)); + MCSymbol *Sym = Printer.createTempSymbol("typeindex"); + auto *WasmSym = cast<MCSymbolWasm>(Sym); + WasmSym->setSignature(Signature.get()); + Printer.addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = + MCSymbolRefExpr::create(WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); + return MCOperand::createExpr(Expr); +} + +// Return the WebAssembly type associated with the given register class. +static wasm::ValType getType(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return wasm::ValType::I32; + if (RC == &WebAssembly::I64RegClass) + return wasm::ValType::I64; + if (RC == &WebAssembly::F32RegClass) + return wasm::ValType::F32; + if (RC == &WebAssembly::F64RegClass) + return wasm::ValType::F64; + if (RC == &WebAssembly::V128RegClass) + return wasm::ValType::V128; + llvm_unreachable("Unexpected register class"); +} + +static void getFunctionReturns(const MachineInstr *MI, + SmallVectorImpl<wasm::ValType> &Returns) { + const Function &F = MI->getMF()->getFunction(); + const TargetMachine &TM = MI->getMF()->getTarget(); + Type *RetTy = F.getReturnType(); + SmallVector<MVT, 4> CallerRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + valTypesFromMVTs(CallerRetTys, Returns); +} + +void WebAssemblyMCInstLower::lower(const MachineInstr *MI, + MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + const MCInstrDesc &Desc = MI->getDesc(); + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->print(errs()); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_MachineBasicBlock: + MI->print(errs()); + llvm_unreachable("MachineBasicBlock operand should have been rewritten"); + case MachineOperand::MO_Register: { + // Ignore all implicit register operands. + if (MO.isImplicit()) + continue; + const WebAssemblyFunctionInfo &MFI = + *MI->getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>(); + unsigned WAReg = MFI.getWAReg(MO.getReg()); + MCOp = MCOperand::createReg(WAReg); + break; + } + case MachineOperand::MO_Immediate: + if (I < Desc.NumOperands) { + const MCOperandInfo &Info = Desc.OpInfo[I]; + if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) { + SmallVector<wasm::ValType, 4> Returns; + SmallVector<wasm::ValType, 4> Params; + + const MachineRegisterInfo &MRI = + MI->getParent()->getParent()->getRegInfo(); + for (const MachineOperand &MO : MI->defs()) + Returns.push_back(getType(MRI.getRegClass(MO.getReg()))); + for (const MachineOperand &MO : MI->explicit_uses()) + if (MO.isReg()) + Params.push_back(getType(MRI.getRegClass(MO.getReg()))); + + // call_indirect instructions have a callee operand at the end which + // doesn't count as a param. + if (WebAssembly::isCallIndirect(MI->getOpcode())) + Params.pop_back(); + + // return_call_indirect instructions have the return type of the + // caller + if (MI->getOpcode() == WebAssembly::RET_CALL_INDIRECT) + getFunctionReturns(MI, Returns); + + MCOp = lowerTypeIndexOperand(std::move(Returns), std::move(Params)); + break; + } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) { + auto BT = static_cast<WebAssembly::BlockType>(MO.getImm()); + assert(BT != WebAssembly::BlockType::Invalid); + if (BT == WebAssembly::BlockType::Multivalue) { + SmallVector<wasm::ValType, 1> Returns; + getFunctionReturns(MI, Returns); + MCOp = lowerTypeIndexOperand(std::move(Returns), + SmallVector<wasm::ValType, 4>()); + break; + } + } + } + MCOp = MCOperand::createImm(MO.getImm()); + break; + case MachineOperand::MO_FPImmediate: { + // TODO: MC converts all floating point immediate operands to double. + // This is fine for numeric values, but may cause NaNs to change bits. + const ConstantFP *Imm = MO.getFPImm(); + if (Imm->getType()->isFloatTy()) + MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToFloat()); + else if (Imm->getType()->isDoubleTy()) + MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToDouble()); + else + llvm_unreachable("unknown floating point immediate type"); + break; + } + case MachineOperand::MO_GlobalAddress: + MCOp = lowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + // The target flag indicates whether this is a symbol for a + // variable or a function. + assert(MO.getTargetFlags() == 0 && + "WebAssembly uses only symbol flags on ExternalSymbols"); + MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_MCSymbol: + // This is currently used only for LSDA symbols (GCC_except_table), + // because global addresses or other external symbols are handled above. + assert(MO.getTargetFlags() == 0 && + "WebAssembly does not use target flags on MCSymbol"); + MCOp = lowerSymbolOperand(MO, MO.getMCSymbol()); + break; + } + + OutMI.addOperand(MCOp); + } + + if (!WasmKeepRegisters) + removeRegisterOperands(MI, OutMI); +} + +static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI) { + // Remove all uses of stackified registers to bring the instruction format + // into its final stack form used thruout MC, and transition opcodes to + // their _S variant. + // We do this seperate from the above code that still may need these + // registers for e.g. call_indirect signatures. + // See comments in lib/Target/WebAssembly/WebAssemblyInstrFormats.td for + // details. + // TODO: the code above creates new registers which are then removed here. + // That code could be slightly simplified by not doing that, though maybe + // it is simpler conceptually to keep the code above in "register mode" + // until this transition point. + // FIXME: we are not processing inline assembly, which contains register + // operands, because it is used by later target generic code. + if (MI->isDebugInstr() || MI->isLabel() || MI->isInlineAsm()) + return; + + // Transform to _S instruction. + auto RegOpcode = OutMI.getOpcode(); + auto StackOpcode = WebAssembly::getStackOpcode(RegOpcode); + assert(StackOpcode != -1 && "Failed to stackify instruction"); + OutMI.setOpcode(StackOpcode); + + // Remove register operands. + for (auto I = OutMI.getNumOperands(); I; --I) { + auto &MO = OutMI.getOperand(I - 1); + if (MO.isReg()) { + OutMI.erase(&MO); + } + } +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h new file mode 100644 index 000000000000..d79c54097eb7 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h @@ -0,0 +1,47 @@ +//===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the class to lower WebAssembly MachineInstrs to +/// their corresponding MCInst records. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H + +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class WebAssemblyAsmPrinter; +class MCContext; +class MCSymbol; +class MachineInstr; +class MachineOperand; + +/// This class is used to lower an MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower { + MCContext &Ctx; + WebAssemblyAsmPrinter &Printer; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + MCOperand lowerTypeIndexOperand(SmallVector<wasm::ValType, 1> &&, + SmallVector<wasm::ValType, 4> &&) const; + +public: + WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer) + : Ctx(ctx), Printer(printer) {} + void lower(const MachineInstr *MI, MCInst &OutMI) const; +}; +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp new file mode 100644 index 000000000000..e4cc2389147b --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -0,0 +1,94 @@ +//=- WebAssemblyMachineFunctionInfo.cpp - WebAssembly Machine Function Info -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements WebAssembly-specific per-machine-function +/// information. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyISelLowering.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/Analysis.h" +using namespace llvm; + +WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() = default; // anchor. + +void WebAssemblyFunctionInfo::initWARegs() { + assert(WARegs.empty()); + unsigned Reg = UnusedReg; + WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg); +} + +void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM, + Type *Ty, SmallVectorImpl<MVT> &ValueVTs) { + const DataLayout &DL(F.getParent()->getDataLayout()); + const WebAssemblyTargetLowering &TLI = + *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering(); + SmallVector<EVT, 4> VTs; + ComputeValueVTs(TLI, DL, Ty, VTs); + + for (EVT VT : VTs) { + unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT); + for (unsigned I = 0; I != NumRegs; ++I) + ValueVTs.push_back(RegisterVT); + } +} + +void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F, + const TargetMachine &TM, + SmallVectorImpl<MVT> &Params, + SmallVectorImpl<MVT> &Results) { + computeLegalValueVTs(F, TM, Ty->getReturnType(), Results); + + MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()); + if (Results.size() > 1 && + !TM.getSubtarget<WebAssemblySubtarget>(F).hasMultivalue()) { + // WebAssembly can't lower returns of multiple values without demoting to + // sret unless multivalue is enabled (see + // WebAssemblyTargetLowering::CanLowerReturn). So replace multiple return + // values with a poitner parameter. + Results.clear(); + Params.push_back(PtrVT); + } + + for (auto *Param : Ty->params()) + computeLegalValueVTs(F, TM, Param, Params); + if (Ty->isVarArg()) + Params.push_back(PtrVT); +} + +void llvm::valTypesFromMVTs(const ArrayRef<MVT> &In, + SmallVectorImpl<wasm::ValType> &Out) { + for (MVT Ty : In) + Out.push_back(WebAssembly::toValType(Ty)); +} + +std::unique_ptr<wasm::WasmSignature> +llvm::signatureFromMVTs(const SmallVectorImpl<MVT> &Results, + const SmallVectorImpl<MVT> &Params) { + auto Sig = std::make_unique<wasm::WasmSignature>(); + valTypesFromMVTs(Results, Sig->Returns); + valTypesFromMVTs(Params, Sig->Params); + return Sig; +} + +yaml::WebAssemblyFunctionInfo::WebAssemblyFunctionInfo( + const llvm::WebAssemblyFunctionInfo &MFI) + : CFGStackified(MFI.isCFGStackified()) {} + +void yaml::WebAssemblyFunctionInfo::mappingImpl(yaml::IO &YamlIO) { + MappingTraits<WebAssemblyFunctionInfo>::mapping(YamlIO, *this); +} + +void WebAssemblyFunctionInfo::initializeBaseYamlFields( + const yaml::WebAssemblyFunctionInfo &YamlMFI) { + CFGStackified = YamlMFI.CFGStackified; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h new file mode 100644 index 000000000000..16e2f4392984 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -0,0 +1,177 @@ +// WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares WebAssembly-specific per-machine-function +/// information. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCSymbolWasm.h" + +namespace llvm { + +namespace yaml { +struct WebAssemblyFunctionInfo; +} + +/// This class is derived from MachineFunctionInfo and contains private +/// WebAssembly-specific information for each MachineFunction. +class WebAssemblyFunctionInfo final : public MachineFunctionInfo { + MachineFunction &MF; + + std::vector<MVT> Params; + std::vector<MVT> Results; + std::vector<MVT> Locals; + + /// A mapping from CodeGen vreg index to WebAssembly register number. + std::vector<unsigned> WARegs; + + /// A mapping from CodeGen vreg index to a boolean value indicating whether + /// the given register is considered to be "stackified", meaning it has been + /// determined or made to meet the stack requirements: + /// - single use (per path) + /// - single def (per path) + /// - defined and used in LIFO order with other stack registers + BitVector VRegStackified; + + // A virtual register holding the pointer to the vararg buffer for vararg + // functions. It is created and set in TLI::LowerFormalArguments and read by + // TLI::LowerVASTART + unsigned VarargVreg = -1U; + + // A virtual register holding the base pointer for functions that have + // overaligned values on the user stack. + unsigned BasePtrVreg = -1U; + + // Function properties. + bool CFGStackified = false; + +public: + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} + ~WebAssemblyFunctionInfo() override; + void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI); + + void addParam(MVT VT) { Params.push_back(VT); } + const std::vector<MVT> &getParams() const { return Params; } + + void addResult(MVT VT) { Results.push_back(VT); } + const std::vector<MVT> &getResults() const { return Results; } + + void clearParamsAndResults() { + Params.clear(); + Results.clear(); + } + + void setNumLocals(size_t NumLocals) { Locals.resize(NumLocals, MVT::i32); } + void setLocal(size_t i, MVT VT) { Locals[i] = VT; } + void addLocal(MVT VT) { Locals.push_back(VT); } + const std::vector<MVT> &getLocals() const { return Locals; } + + unsigned getVarargBufferVreg() const { + assert(VarargVreg != -1U && "Vararg vreg hasn't been set"); + return VarargVreg; + } + void setVarargBufferVreg(unsigned Reg) { VarargVreg = Reg; } + + unsigned getBasePointerVreg() const { + assert(BasePtrVreg != -1U && "Base ptr vreg hasn't been set"); + return BasePtrVreg; + } + void setBasePointerVreg(unsigned Reg) { BasePtrVreg = Reg; } + + static const unsigned UnusedReg = -1u; + + void stackifyVReg(unsigned VReg) { + assert(MF.getRegInfo().getUniqueVRegDef(VReg)); + auto I = Register::virtReg2Index(VReg); + if (I >= VRegStackified.size()) + VRegStackified.resize(I + 1); + VRegStackified.set(I); + } + void unstackifyVReg(unsigned VReg) { + auto I = Register::virtReg2Index(VReg); + if (I < VRegStackified.size()) + VRegStackified.reset(I); + } + bool isVRegStackified(unsigned VReg) const { + auto I = Register::virtReg2Index(VReg); + if (I >= VRegStackified.size()) + return false; + return VRegStackified.test(I); + } + + void initWARegs(); + void setWAReg(unsigned VReg, unsigned WAReg) { + assert(WAReg != UnusedReg); + auto I = Register::virtReg2Index(VReg); + assert(I < WARegs.size()); + WARegs[I] = WAReg; + } + unsigned getWAReg(unsigned VReg) const { + auto I = Register::virtReg2Index(VReg); + assert(I < WARegs.size()); + return WARegs[I]; + } + + // For a given stackified WAReg, return the id number to print with push/pop. + static unsigned getWARegStackId(unsigned Reg) { + assert(Reg & INT32_MIN); + return Reg & INT32_MAX; + } + + bool isCFGStackified() const { return CFGStackified; } + void setCFGStackified(bool Value = true) { CFGStackified = Value; } +}; + +void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty, + SmallVectorImpl<MVT> &ValueVTs); + +// Compute the signature for a given FunctionType (Ty). Note that it's not the +// signature for F (F is just used to get varous context) +void computeSignatureVTs(const FunctionType *Ty, const Function &F, + const TargetMachine &TM, SmallVectorImpl<MVT> &Params, + SmallVectorImpl<MVT> &Results); + +void valTypesFromMVTs(const ArrayRef<MVT> &In, + SmallVectorImpl<wasm::ValType> &Out); + +std::unique_ptr<wasm::WasmSignature> +signatureFromMVTs(const SmallVectorImpl<MVT> &Results, + const SmallVectorImpl<MVT> &Params); + +namespace yaml { + +struct WebAssemblyFunctionInfo final : public yaml::MachineFunctionInfo { + bool CFGStackified = false; + + WebAssemblyFunctionInfo() = default; + WebAssemblyFunctionInfo(const llvm::WebAssemblyFunctionInfo &MFI); + + void mappingImpl(yaml::IO &YamlIO) override; + ~WebAssemblyFunctionInfo() = default; +}; + +template <> struct MappingTraits<WebAssemblyFunctionInfo> { + static void mapping(IO &YamlIO, WebAssemblyFunctionInfo &MFI) { + YamlIO.mapOptional("isCFGStackified", MFI.CFGStackified, false); + } +}; + +} // end namespace yaml + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp new file mode 100644 index 000000000000..ac428fcc826a --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp @@ -0,0 +1,212 @@ +//== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements an optimization pass using memory intrinsic results. +/// +/// Calls to memory intrinsics (memcpy, memmove, memset) return the destination +/// address. They are in the form of +/// %dst_new = call @memcpy %dst, %src, %len +/// where %dst and %dst_new registers contain the same value. +/// +/// This is to enable an optimization wherein uses of the %dst register used in +/// the parameter can be replaced by uses of the %dst_new register used in the +/// result, making the %dst register more likely to be single-use, thus more +/// likely to be useful to register stackifying, and potentially also exposing +/// the call instruction itself to register stackifying. These both can reduce +/// local.get/local.set traffic. +/// +/// The LLVM intrinsics for these return void so they can't use the returned +/// attribute and consequently aren't handled by the OptimizeReturned pass. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-mem-intrinsic-results" + +namespace { +class WebAssemblyMemIntrinsicResults final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyMemIntrinsicResults() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "WebAssembly Memory Intrinsic Results"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyMemIntrinsicResults::ID = 0; +INITIALIZE_PASS(WebAssemblyMemIntrinsicResults, DEBUG_TYPE, + "Optimize memory intrinsic result values for WebAssembly", + false, false) + +FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() { + return new WebAssemblyMemIntrinsicResults(); +} + +// Replace uses of FromReg with ToReg if they are dominated by MI. +static bool replaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI, + unsigned FromReg, unsigned ToReg, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, + LiveIntervals &LIS) { + bool Changed = false; + + LiveInterval *FromLI = &LIS.getInterval(FromReg); + LiveInterval *ToLI = &LIS.getInterval(ToReg); + + SlotIndex FromIdx = LIS.getInstructionIndex(MI).getRegSlot(); + VNInfo *FromVNI = FromLI->getVNInfoAt(FromIdx); + + SmallVector<SlotIndex, 4> Indices; + + for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); + I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + + // Check that MI dominates the instruction in the normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + + // If this use gets a different value, skip it. + SlotIndex WhereIdx = LIS.getInstructionIndex(*Where); + VNInfo *WhereVNI = FromLI->getVNInfoAt(WhereIdx); + if (WhereVNI && WhereVNI != FromVNI) + continue; + + // Make sure ToReg isn't clobbered before it gets there. + VNInfo *ToVNI = ToLI->getVNInfoAt(WhereIdx); + if (ToVNI && ToVNI != FromVNI) + continue; + + Changed = true; + LLVM_DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from " + << MI << "\n"); + O.setReg(ToReg); + + // If the store's def was previously dead, it is no longer. + if (!O.isUndef()) { + MI.getOperand(0).setIsDead(false); + + Indices.push_back(WhereIdx.getRegSlot()); + } + } + + if (Changed) { + // Extend ToReg's liveness. + LIS.extendToIndices(*ToLI, Indices); + + // Shrink FromReg's liveness. + LIS.shrinkToUses(FromLI); + + // If we replaced all dominated uses, FromReg is now killed at MI. + if (!FromLI->liveAt(FromIdx.getDeadSlot())) + MI.addRegisterKilled(FromReg, MBB.getParent() + ->getSubtarget<WebAssemblySubtarget>() + .getRegisterInfo()); + } + + return Changed; +} + +static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, LiveIntervals &LIS, + const WebAssemblyTargetLowering &TLI, + const TargetLibraryInfo &LibInfo) { + MachineOperand &Op1 = MI.getOperand(1); + if (!Op1.isSymbol()) + return false; + + StringRef Name(Op1.getSymbolName()); + bool CallReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) || + Name == TLI.getLibcallName(RTLIB::MEMMOVE) || + Name == TLI.getLibcallName(RTLIB::MEMSET); + if (!CallReturnsInput) + return false; + + LibFunc Func; + if (!LibInfo.getLibFunc(Name, Func)) + return false; + + Register FromReg = MI.getOperand(2).getReg(); + Register ToReg = MI.getOperand(0).getReg(); + if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) + report_fatal_error("Memory Intrinsic results: call to builtin function " + "with wrong signature, from/to mismatch"); + return replaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); +} + +bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Memory Intrinsic Results **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto &MDT = getAnalysis<MachineDominatorTree>(); + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering(); + const auto &LibInfo = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(MF.getFunction()); + auto &LIS = getAnalysis<LiveIntervals>(); + bool Changed = false; + + // We don't preserve SSA form. + MRI.leaveSSA(); + + assert(MRI.tracksLiveness() && + "MemIntrinsicResults expects liveness tracking"); + + for (auto &MBB : MF) { + LLVM_DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::CALL_i32: + case WebAssembly::CALL_i64: + Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo); + break; + } + } + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp new file mode 100644 index 000000000000..0bd30791e57c --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -0,0 +1,107 @@ +//===--- WebAssemblyOptimizeLiveIntervals.cpp - LiveInterval processing ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Optimize LiveIntervals for use in a post-RA context. +// +/// LiveIntervals normally runs before register allocation when the code is +/// only recently lowered out of SSA form, so it's uncommon for registers to +/// have multiple defs, and when they do, the defs are usually closely related. +/// Later, after coalescing, tail duplication, and other optimizations, it's +/// more common to see registers with multiple unrelated defs. This pass +/// updates LiveIntervals to distribute the value numbers across separate +/// LiveIntervals. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-live-intervals" + +namespace { +class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Optimize Live Intervals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(LiveVariablesID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyOptimizeLiveIntervals() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyOptimizeLiveIntervals::ID = 0; +INITIALIZE_PASS(WebAssemblyOptimizeLiveIntervals, DEBUG_TYPE, + "Optimize LiveIntervals for WebAssembly", false, false) + +FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() { + return new WebAssemblyOptimizeLiveIntervals(); +} + +bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n" + "********** Function: " + << MF.getName() << '\n'); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto &LIS = getAnalysis<LiveIntervals>(); + + // We don't preserve SSA form. + MRI.leaveSSA(); + + assert(MRI.tracksLiveness() && "OptimizeLiveIntervals expects liveness"); + + // Split multiple-VN LiveIntervals into multiple LiveIntervals. + SmallVector<LiveInterval *, 4> SplitLIs; + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { + unsigned Reg = Register::index2VirtReg(I); + if (MRI.reg_nodbg_empty(Reg)) + continue; + + LIS.splitSeparateComponents(LIS.getInterval(Reg), SplitLIs); + SplitLIs.clear(); + } + + // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF + // instructions to satisfy LiveIntervals' requirement that all uses be + // dominated by defs. Now that LiveIntervals has computed which of these + // defs are actually needed and which are dead, remove the dead ones. + for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE;) { + MachineInstr *MI = &*MII++; + if (MI->isImplicitDef() && MI->getOperand(0).isDead()) { + LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg()); + LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*MI).getRegSlot()); + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + } + + return false; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp new file mode 100644 index 000000000000..9b60596e42b4 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -0,0 +1,80 @@ +//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Optimize calls with "returned" attributes for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-returned" + +namespace { +class OptimizeReturned final : public FunctionPass, + public InstVisitor<OptimizeReturned> { + StringRef getPassName() const override { + return "WebAssembly Optimize Returned"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override; + + DominatorTree *DT = nullptr; + +public: + static char ID; + OptimizeReturned() : FunctionPass(ID) {} + + void visitCallSite(CallSite CS); +}; +} // End anonymous namespace + +char OptimizeReturned::ID = 0; +INITIALIZE_PASS(OptimizeReturned, DEBUG_TYPE, + "Optimize calls with \"returned\" attributes for WebAssembly", + false, false) + +FunctionPass *llvm::createWebAssemblyOptimizeReturned() { + return new OptimizeReturned(); +} + +void OptimizeReturned::visitCallSite(CallSite CS) { + for (unsigned I = 0, E = CS.getNumArgOperands(); I < E; ++I) + if (CS.paramHasAttr(I, Attribute::Returned)) { + Instruction *Inst = CS.getInstruction(); + Value *Arg = CS.getArgOperand(I); + // Ignore constants, globals, undef, etc. + if (isa<Constant>(Arg)) + continue; + // Like replaceDominatedUsesWith but using Instruction/Use dominance. + Arg->replaceUsesWithIf(Inst, + [&](Use &U) { return DT->dominates(Inst, U); }); + } +} + +bool OptimizeReturned::runOnFunction(Function &F) { + LLVM_DEBUG(dbgs() << "********** Optimize returned Attributes **********\n" + "********** Function: " + << F.getName() << '\n'); + + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + visit(F); + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp new file mode 100644 index 000000000000..ea6cd09a604c --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -0,0 +1,186 @@ +//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Late peephole optimizations for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-peephole" + +static cl::opt<bool> DisableWebAssemblyFallthroughReturnOpt( + "disable-wasm-fallthrough-return-opt", cl::Hidden, + cl::desc("WebAssembly: Disable fallthrough-return optimizations."), + cl::init(false)); + +namespace { +class WebAssemblyPeephole final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly late peephole optimizer"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; + WebAssemblyPeephole() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyPeephole::ID = 0; +INITIALIZE_PASS(WebAssemblyPeephole, DEBUG_TYPE, + "WebAssembly peephole optimizations", false, false) + +FunctionPass *llvm::createWebAssemblyPeephole() { + return new WebAssemblyPeephole(); +} + +/// If desirable, rewrite NewReg to a drop register. +static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg, + MachineOperand &MO, WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { + bool Changed = false; + if (OldReg == NewReg) { + Changed = true; + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + MO.setReg(NewReg); + MO.setIsDead(); + MFI.stackifyVReg(NewReg); + } + return Changed; +} + +static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, + const MachineFunction &MF, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI, + const WebAssemblyInstrInfo &TII) { + if (DisableWebAssemblyFallthroughReturnOpt) + return false; + if (&MBB != &MF.back()) + return false; + + MachineBasicBlock::iterator End = MBB.end(); + --End; + assert(End->getOpcode() == WebAssembly::END_FUNCTION); + --End; + if (&MI != &*End) + return false; + + for (auto &MO : MI.explicit_operands()) { + // If the operand isn't stackified, insert a COPY to read the operands and + // stackify them. + Register Reg = MO.getReg(); + if (!MFI.isVRegStackified(Reg)) { + unsigned CopyLocalOpc; + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + switch (RegClass->getID()) { + case WebAssembly::I32RegClassID: + CopyLocalOpc = WebAssembly::COPY_I32; + break; + case WebAssembly::I64RegClassID: + CopyLocalOpc = WebAssembly::COPY_I64; + break; + case WebAssembly::F32RegClassID: + CopyLocalOpc = WebAssembly::COPY_F32; + break; + case WebAssembly::F64RegClassID: + CopyLocalOpc = WebAssembly::COPY_F64; + break; + case WebAssembly::V128RegClassID: + CopyLocalOpc = WebAssembly::COPY_V128; + break; + case WebAssembly::EXNREFRegClassID: + CopyLocalOpc = WebAssembly::COPY_EXNREF; + break; + default: + llvm_unreachable("Unexpected register class for return operand"); + } + Register NewReg = MRI.createVirtualRegister(RegClass); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg) + .addReg(Reg); + MO.setReg(NewReg); + MFI.stackifyVReg(NewReg); + } + } + + MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN)); + return true; +} + +bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Peephole **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering(); + auto &LibInfo = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(MF.getFunction()); + bool Changed = false; + + for (auto &MBB : MF) + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::CALL_i32: + case WebAssembly::CALL_i64: { + MachineOperand &Op1 = MI.getOperand(1); + if (Op1.isSymbol()) { + StringRef Name(Op1.getSymbolName()); + if (Name == TLI.getLibcallName(RTLIB::MEMCPY) || + Name == TLI.getLibcallName(RTLIB::MEMMOVE) || + Name == TLI.getLibcallName(RTLIB::MEMSET)) { + LibFunc Func; + if (LibInfo.getLibFunc(Name, Func)) { + const auto &Op2 = MI.getOperand(2); + if (!Op2.isReg()) + report_fatal_error("Peephole: call to builtin function with " + "wrong signature, not consuming reg"); + MachineOperand &MO = MI.getOperand(0); + Register OldReg = MO.getReg(); + Register NewReg = Op2.getReg(); + + if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg)) + report_fatal_error("Peephole: call to builtin function with " + "wrong signature, from/to mismatch"); + Changed |= maybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI); + } + } + } + break; + } + // Optimize away an explicit void return at the end of the function. + case WebAssembly::RETURN: + Changed |= maybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII); + break; + } + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp new file mode 100644 index 000000000000..799b9388097c --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -0,0 +1,127 @@ +//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Fix up code to meet LiveInterval's requirements. +/// +/// Some CodeGen passes don't preserve LiveInterval's requirements, because +/// they run after register allocation and it isn't important. However, +/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code +/// to meet LiveIntervals' requirements; primarily, it ensures that all +/// virtual register uses have definitions (IMPLICIT_DEF definitions if +/// nothing else). +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-prepare-for-live-intervals" + +namespace { +class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {} + +private: + StringRef getPassName() const override { + return "WebAssembly Prepare For LiveIntervals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyPrepareForLiveIntervals::ID = 0; +INITIALIZE_PASS(WebAssemblyPrepareForLiveIntervals, DEBUG_TYPE, + "Fix up code for LiveIntervals", false, false) + +FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() { + return new WebAssemblyPrepareForLiveIntervals(); +} + +// Test whether the given register has an ARGUMENT def. +static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { + for (const auto &Def : MRI.def_instructions(Reg)) + if (WebAssembly::isArgument(Def.getOpcode())) + return true; + return false; +} + +bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Prepare For LiveIntervals **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + MachineBasicBlock &Entry = *MF.begin(); + + assert(!mustPreserveAnalysisID(LiveIntervalsID) && + "LiveIntervals shouldn't be active yet!"); + + // We don't preserve SSA form. + MRI.leaveSSA(); + + // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF + // instructions. LiveIntervals requires that all paths to virtual register + // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to + // conservatively satisfy this. + // + // TODO: This is fairly heavy-handed; find a better approach. + // + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { + unsigned Reg = Register::index2VirtReg(I); + + // Skip unused registers. + if (MRI.use_nodbg_empty(Reg)) + continue; + + // Skip registers that have an ARGUMENT definition. + if (hasArgumentDef(Reg, MRI)) + continue; + + BuildMI(Entry, Entry.begin(), DebugLoc(), + TII.get(WebAssembly::IMPLICIT_DEF), Reg); + Changed = true; + } + + // Move ARGUMENT_* instructions to the top of the entry block, so that their + // liveness reflects the fact that these really are live-in values. + for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) { + MachineInstr &MI = *MII++; + if (WebAssembly::isArgument(MI.getOpcode())) { + MI.removeFromParent(); + Entry.insert(Entry.begin(), &MI); + } + } + + // Ok, we're now ready to run the LiveIntervals analysis again. + MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp new file mode 100644 index 000000000000..043b6f1b7d18 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -0,0 +1,174 @@ +//===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a virtual register coloring pass. +/// +/// WebAssembly doesn't have a fixed number of registers, but it is still +/// desirable to minimize the total number of registers used in each function. +/// +/// This code is modeled after lib/CodeGen/StackSlotColoring.cpp. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-coloring" + +namespace { +class WebAssemblyRegColoring final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegColoring() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "WebAssembly Register Coloring"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyRegColoring::ID = 0; +INITIALIZE_PASS(WebAssemblyRegColoring, DEBUG_TYPE, + "Minimize number of registers used", false, false) + +FunctionPass *llvm::createWebAssemblyRegColoring() { + return new WebAssemblyRegColoring(); +} + +// Compute the total spill weight for VReg. +static float computeWeight(const MachineRegisterInfo *MRI, + const MachineBlockFrequencyInfo *MBFI, + unsigned VReg) { + float Weight = 0.0f; + for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg)) + Weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI, + *MO.getParent()); + return Weight; +} + +bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Register Coloring **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + // If there are calls to setjmp or sigsetjmp, don't perform coloring. Virtual + // registers could be modified before the longjmp is executed, resulting in + // the wrong value being used afterwards. (See <rdar://problem/8007500>.) + // TODO: Does WebAssembly need to care about setjmp for register coloring? + if (MF.exposesReturnsTwice()) + return false; + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + LiveIntervals *Liveness = &getAnalysis<LiveIntervals>(); + const MachineBlockFrequencyInfo *MBFI = + &getAnalysis<MachineBlockFrequencyInfo>(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + + // Gather all register intervals into a list and sort them. + unsigned NumVRegs = MRI->getNumVirtRegs(); + SmallVector<LiveInterval *, 0> SortedIntervals; + SortedIntervals.reserve(NumVRegs); + + LLVM_DEBUG(dbgs() << "Interesting register intervals:\n"); + for (unsigned I = 0; I < NumVRegs; ++I) { + unsigned VReg = Register::index2VirtReg(I); + if (MFI.isVRegStackified(VReg)) + continue; + // Skip unused registers, which can use $drop. + if (MRI->use_empty(VReg)) + continue; + + LiveInterval *LI = &Liveness->getInterval(VReg); + assert(LI->weight == 0.0f); + LI->weight = computeWeight(MRI, MBFI, VReg); + LLVM_DEBUG(LI->dump()); + SortedIntervals.push_back(LI); + } + LLVM_DEBUG(dbgs() << '\n'); + + // Sort them to put arguments first (since we don't want to rename live-in + // registers), by weight next, and then by position. + // TODO: Investigate more intelligent sorting heuristics. For starters, we + // should try to coalesce adjacent live intervals before non-adjacent ones. + llvm::sort(SortedIntervals, [MRI](LiveInterval *LHS, LiveInterval *RHS) { + if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg)) + return MRI->isLiveIn(LHS->reg); + if (LHS->weight != RHS->weight) + return LHS->weight > RHS->weight; + if (LHS->empty() || RHS->empty()) + return !LHS->empty() && RHS->empty(); + return *LHS < *RHS; + }); + + LLVM_DEBUG(dbgs() << "Coloring register intervals:\n"); + SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u); + SmallVector<SmallVector<LiveInterval *, 4>, 16> Assignments( + SortedIntervals.size()); + BitVector UsedColors(SortedIntervals.size()); + bool Changed = false; + for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) { + LiveInterval *LI = SortedIntervals[I]; + unsigned Old = LI->reg; + size_t Color = I; + const TargetRegisterClass *RC = MRI->getRegClass(Old); + + // Check if it's possible to reuse any of the used colors. + if (!MRI->isLiveIn(Old)) + for (unsigned C : UsedColors.set_bits()) { + if (MRI->getRegClass(SortedIntervals[C]->reg) != RC) + continue; + for (LiveInterval *OtherLI : Assignments[C]) + if (!OtherLI->empty() && OtherLI->overlaps(*LI)) + goto continue_outer; + Color = C; + break; + continue_outer:; + } + + unsigned New = SortedIntervals[Color]->reg; + SlotMapping[I] = New; + Changed |= Old != New; + UsedColors.set(Color); + Assignments[Color].push_back(LI); + LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg) + << " to vreg" << Register::virtReg2Index(New) << "\n"); + } + if (!Changed) + return false; + + // Rewrite register operands. + for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) { + unsigned Old = SortedIntervals[I]->reg; + unsigned New = SlotMapping[I]; + if (Old != New) + MRI->replaceRegWith(Old, New); + } + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp new file mode 100644 index 000000000000..72e7a7cf5042 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -0,0 +1,110 @@ +//===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a pass which assigns WebAssembly register +/// numbers for CodeGen virtual registers. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-numbering" + +namespace { +class WebAssemblyRegNumbering final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Register Numbering"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegNumbering() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyRegNumbering::ID = 0; +INITIALIZE_PASS(WebAssemblyRegNumbering, DEBUG_TYPE, + "Assigns WebAssembly register numbers for virtual registers", + false, false) + +FunctionPass *llvm::createWebAssemblyRegNumbering() { + return new WebAssemblyRegNumbering(); +} + +bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Register Numbering **********\n" + "********** Function: " + << MF.getName() << '\n'); + + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + MFI.initWARegs(); + + // WebAssembly argument registers are in the same index space as local + // variables. Assign the numbers for them first. + MachineBasicBlock &EntryMBB = MF.front(); + for (MachineInstr &MI : EntryMBB) { + if (!WebAssembly::isArgument(MI.getOpcode())) + break; + + int64_t Imm = MI.getOperand(1).getImm(); + LLVM_DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() + << " -> WAReg " << Imm << "\n"); + MFI.setWAReg(MI.getOperand(0).getReg(), Imm); + } + + // Then assign regular WebAssembly registers for all remaining used + // virtual registers. TODO: Consider sorting the registers by frequency of + // use, to maximize usage of small immediate fields. + unsigned NumVRegs = MF.getRegInfo().getNumVirtRegs(); + unsigned NumStackRegs = 0; + // Start the numbering for locals after the arg regs + unsigned CurReg = MFI.getParams().size(); + for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) { + unsigned VReg = Register::index2VirtReg(VRegIdx); + // Skip unused registers. + if (MRI.use_empty(VReg)) + continue; + // Handle stackified registers. + if (MFI.isVRegStackified(VReg)) { + LLVM_DEBUG(dbgs() << "VReg " << VReg << " -> WAReg " + << (INT32_MIN | NumStackRegs) << "\n"); + MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++); + continue; + } + if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg) { + LLVM_DEBUG(dbgs() << "VReg " << VReg << " -> WAReg " << CurReg << "\n"); + MFI.setWAReg(VReg, CurReg++); + } + } + + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp new file mode 100644 index 000000000000..421d353a89e8 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -0,0 +1,937 @@ +//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a register stacking pass. +/// +/// This pass reorders instructions to put register uses and defs in an order +/// such that they form single-use expression trees. Registers fitting this form +/// are then marked as "stackified", meaning references to them are replaced by +/// "push" and "pop" from the value stack. +/// +/// This is primarily a code size optimization, since temporary values on the +/// value stack don't need to be named. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" +#include "WebAssemblyDebugValueManager.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyUtilities.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-stackify" + +namespace { +class WebAssemblyRegStackify final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Register Stackify"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(LiveVariablesID); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegStackify() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyRegStackify::ID = 0; +INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE, + "Reorder instructions to use the WebAssembly value stack", + false, false) + +FunctionPass *llvm::createWebAssemblyRegStackify() { + return new WebAssemblyRegStackify(); +} + +// Decorate the given instruction with implicit operands that enforce the +// expression stack ordering constraints for an instruction which is on +// the expression stack. +static void imposeStackOrdering(MachineInstr *MI) { + // Write the opaque VALUE_STACK register. + if (!MI->definesRegister(WebAssembly::VALUE_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK, + /*isDef=*/true, + /*isImp=*/true)); + + // Also read the opaque VALUE_STACK register. + if (!MI->readsRegister(WebAssembly::VALUE_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK, + /*isDef=*/false, + /*isImp=*/true)); +} + +// Convert an IMPLICIT_DEF instruction into an instruction which defines +// a constant zero value. +static void convertImplicitDefToConstZero(MachineInstr *MI, + MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, + MachineFunction &MF, + LiveIntervals &LIS) { + assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF); + + const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg()); + if (RegClass == &WebAssembly::I32RegClass) { + MI->setDesc(TII->get(WebAssembly::CONST_I32)); + MI->addOperand(MachineOperand::CreateImm(0)); + } else if (RegClass == &WebAssembly::I64RegClass) { + MI->setDesc(TII->get(WebAssembly::CONST_I64)); + MI->addOperand(MachineOperand::CreateImm(0)); + } else if (RegClass == &WebAssembly::F32RegClass) { + MI->setDesc(TII->get(WebAssembly::CONST_F32)); + auto *Val = cast<ConstantFP>(Constant::getNullValue( + Type::getFloatTy(MF.getFunction().getContext()))); + MI->addOperand(MachineOperand::CreateFPImm(Val)); + } else if (RegClass == &WebAssembly::F64RegClass) { + MI->setDesc(TII->get(WebAssembly::CONST_F64)); + auto *Val = cast<ConstantFP>(Constant::getNullValue( + Type::getDoubleTy(MF.getFunction().getContext()))); + MI->addOperand(MachineOperand::CreateFPImm(Val)); + } else if (RegClass == &WebAssembly::V128RegClass) { + Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32)); + MI->addOperand(MachineOperand::CreateReg(TempReg, false)); + MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(WebAssembly::CONST_I32), TempReg) + .addImm(0); + LIS.InsertMachineInstrInMaps(*Const); + } else { + llvm_unreachable("Unexpected reg class"); + } +} + +// Determine whether a call to the callee referenced by +// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side +// effects. +static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read, + bool &Write, bool &Effects, bool &StackPointer) { + // All calls can use the stack pointer. + StackPointer = true; + + const MachineOperand &MO = MI.getOperand(CalleeOpNo); + if (MO.isGlobal()) { + const Constant *GV = MO.getGlobal(); + if (const auto *GA = dyn_cast<GlobalAlias>(GV)) + if (!GA->isInterposable()) + GV = GA->getAliasee(); + + if (const auto *F = dyn_cast<Function>(GV)) { + if (!F->doesNotThrow()) + Effects = true; + if (F->doesNotAccessMemory()) + return; + if (F->onlyReadsMemory()) { + Read = true; + return; + } + } + } + + // Assume the worst. + Write = true; + Read = true; + Effects = true; +} + +// Determine whether MI reads memory, writes memory, has side effects, +// and/or uses the stack pointer value. +static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, + bool &Write, bool &Effects, bool &StackPointer) { + assert(!MI.isTerminator()); + + if (MI.isDebugInstr() || MI.isPosition()) + return; + + // Check for loads. + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA)) + Read = true; + + // Check for stores. + if (MI.mayStore()) { + Write = true; + } else if (MI.hasOrderedMemoryRef()) { + switch (MI.getOpcode()) { + case WebAssembly::DIV_S_I32: + case WebAssembly::DIV_S_I64: + case WebAssembly::REM_S_I32: + case WebAssembly::REM_S_I64: + case WebAssembly::DIV_U_I32: + case WebAssembly::DIV_U_I64: + case WebAssembly::REM_U_I32: + case WebAssembly::REM_U_I64: + case WebAssembly::I32_TRUNC_S_F32: + case WebAssembly::I64_TRUNC_S_F32: + case WebAssembly::I32_TRUNC_S_F64: + case WebAssembly::I64_TRUNC_S_F64: + case WebAssembly::I32_TRUNC_U_F32: + case WebAssembly::I64_TRUNC_U_F32: + case WebAssembly::I32_TRUNC_U_F64: + case WebAssembly::I64_TRUNC_U_F64: + // These instruction have hasUnmodeledSideEffects() returning true + // because they trap on overflow and invalid so they can't be arbitrarily + // moved, however hasOrderedMemoryRef() interprets this plus their lack + // of memoperands as having a potential unknown memory reference. + break; + default: + // Record volatile accesses, unless it's a call, as calls are handled + // specially below. + if (!MI.isCall()) { + Write = true; + Effects = true; + } + break; + } + } + + // Check for side effects. + if (MI.hasUnmodeledSideEffects()) { + switch (MI.getOpcode()) { + case WebAssembly::DIV_S_I32: + case WebAssembly::DIV_S_I64: + case WebAssembly::REM_S_I32: + case WebAssembly::REM_S_I64: + case WebAssembly::DIV_U_I32: + case WebAssembly::DIV_U_I64: + case WebAssembly::REM_U_I32: + case WebAssembly::REM_U_I64: + case WebAssembly::I32_TRUNC_S_F32: + case WebAssembly::I64_TRUNC_S_F32: + case WebAssembly::I32_TRUNC_S_F64: + case WebAssembly::I64_TRUNC_S_F64: + case WebAssembly::I32_TRUNC_U_F32: + case WebAssembly::I64_TRUNC_U_F32: + case WebAssembly::I32_TRUNC_U_F64: + case WebAssembly::I64_TRUNC_U_F64: + // These instructions have hasUnmodeledSideEffects() returning true + // because they trap on overflow and invalid so they can't be arbitrarily + // moved, however in the specific case of register stackifying, it is safe + // to move them because overflow and invalid are Undefined Behavior. + break; + default: + Effects = true; + break; + } + } + + // Check for writes to __stack_pointer global. + if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 && + strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0) + StackPointer = true; + + // Analyze calls. + if (MI.isCall()) { + unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode()); + queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer); + } +} + +// Test whether Def is safe and profitable to rematerialize. +static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA, + const WebAssemblyInstrInfo *TII) { + return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA); +} + +// Identify the definition for this register at this point. This is a +// generalization of MachineRegisterInfo::getUniqueVRegDef that uses +// LiveIntervals to handle complex cases. +static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert, + const MachineRegisterInfo &MRI, + const LiveIntervals &LIS) { + // Most registers are in SSA form here so we try a quick MRI query first. + if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg)) + return Def; + + // MRI doesn't know what the Def is. Try asking LIS. + if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore( + LIS.getInstructionIndex(*Insert))) + return LIS.getInstructionFromIndex(ValNo->def); + + return nullptr; +} + +// Test whether Reg, as defined at Def, has exactly one use. This is a +// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals +// to handle complex cases. +static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, LiveIntervals &LIS) { + // Most registers are in SSA form here so we try a quick MRI query first. + if (MRI.hasOneUse(Reg)) + return true; + + bool HasOne = false; + const LiveInterval &LI = LIS.getInterval(Reg); + const VNInfo *DefVNI = + LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()); + assert(DefVNI); + for (auto &I : MRI.use_nodbg_operands(Reg)) { + const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent())); + if (Result.valueIn() == DefVNI) { + if (!Result.isKill()) + return false; + if (HasOne) + return false; + HasOne = true; + } + } + return HasOne; +} + +// Test whether it's safe to move Def to just before Insert. +// TODO: Compute memory dependencies in a way that doesn't require always +// walking the block. +// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be +// more precise. +static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, + AliasAnalysis &AA, const MachineRegisterInfo &MRI) { + assert(Def->getParent() == Insert->getParent()); + + // 'catch' and 'extract_exception' should be the first instruction of a BB and + // cannot move. + if (Def->getOpcode() == WebAssembly::CATCH || + Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) { + const MachineBasicBlock *MBB = Def->getParent(); + auto NextI = std::next(MachineBasicBlock::const_iterator(Def)); + for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI) + ; + if (NextI != Insert) + return false; + } + + // Check for register dependencies. + SmallVector<unsigned, 4> MutableRegisters; + for (const MachineOperand &MO : Def->operands()) { + if (!MO.isReg() || MO.isUndef()) + continue; + Register Reg = MO.getReg(); + + // If the register is dead here and at Insert, ignore it. + if (MO.isDead() && Insert->definesRegister(Reg) && + !Insert->readsRegister(Reg)) + continue; + + if (Register::isPhysicalRegister(Reg)) { + // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions + // from moving down, and we've already checked for that. + if (Reg == WebAssembly::ARGUMENTS) + continue; + // If the physical register is never modified, ignore it. + if (!MRI.isPhysRegModified(Reg)) + continue; + // Otherwise, it's a physical register with unknown liveness. + return false; + } + + // If one of the operands isn't in SSA form, it has different values at + // different times, and we need to make sure we don't move our use across + // a different def. + if (!MO.isDef() && !MRI.hasOneDef(Reg)) + MutableRegisters.push_back(Reg); + } + + bool Read = false, Write = false, Effects = false, StackPointer = false; + query(*Def, AA, Read, Write, Effects, StackPointer); + + // If the instruction does not access memory and has no side effects, it has + // no additional dependencies. + bool HasMutableRegisters = !MutableRegisters.empty(); + if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters) + return true; + + // Scan through the intervening instructions between Def and Insert. + MachineBasicBlock::const_iterator D(Def), I(Insert); + for (--I; I != D; --I) { + bool InterveningRead = false; + bool InterveningWrite = false; + bool InterveningEffects = false; + bool InterveningStackPointer = false; + query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects, + InterveningStackPointer); + if (Effects && InterveningEffects) + return false; + if (Read && InterveningWrite) + return false; + if (Write && (InterveningRead || InterveningWrite)) + return false; + if (StackPointer && InterveningStackPointer) + return false; + + for (unsigned Reg : MutableRegisters) + for (const MachineOperand &MO : I->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) + return false; + } + + return true; +} + +/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses. +static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, + const MachineBasicBlock &MBB, + const MachineRegisterInfo &MRI, + const MachineDominatorTree &MDT, + LiveIntervals &LIS, + WebAssemblyFunctionInfo &MFI) { + const LiveInterval &LI = LIS.getInterval(Reg); + + const MachineInstr *OneUseInst = OneUse.getParent(); + VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst)); + + for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) { + if (&Use == &OneUse) + continue; + + const MachineInstr *UseInst = Use.getParent(); + VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst)); + + if (UseVNI != OneUseVNI) + continue; + + if (UseInst == OneUseInst) { + // Another use in the same instruction. We need to ensure that the one + // selected use happens "before" it. + if (&OneUse > &Use) + return false; + } else { + // Test that the use is dominated by the one selected use. + while (!MDT.dominates(OneUseInst, UseInst)) { + // Actually, dominating is over-conservative. Test that the use would + // happen after the one selected use in the stack evaluation order. + // + // This is needed as a consequence of using implicit local.gets for + // uses and implicit local.sets for defs. + if (UseInst->getDesc().getNumDefs() == 0) + return false; + const MachineOperand &MO = UseInst->getOperand(0); + if (!MO.isReg()) + return false; + Register DefReg = MO.getReg(); + if (!Register::isVirtualRegister(DefReg) || + !MFI.isVRegStackified(DefReg)) + return false; + assert(MRI.hasOneNonDBGUse(DefReg)); + const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg); + const MachineInstr *NewUseInst = NewUse.getParent(); + if (NewUseInst == OneUseInst) { + if (&OneUse > &NewUse) + return false; + break; + } + UseInst = NewUseInst; + } + } + } + return true; +} + +/// Get the appropriate tee opcode for the given register class. +static unsigned getTeeOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::TEE_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::TEE_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::TEE_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::TEE_F64; + if (RC == &WebAssembly::V128RegClass) + return WebAssembly::TEE_V128; + llvm_unreachable("Unexpected register class"); +} + +// Shrink LI to its uses, cleaning up LI. +static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) { + if (LIS.shrinkToUses(&LI)) { + SmallVector<LiveInterval *, 4> SplitLIs; + LIS.splitSeparateComponents(LI, SplitLIs); + } +} + +/// A single-use def in the same block with no intervening memory or register +/// dependencies; move the def down and nest it with the current instruction. +static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op, + MachineInstr *Def, MachineBasicBlock &MBB, + MachineInstr *Insert, LiveIntervals &LIS, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { + LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump()); + + WebAssemblyDebugValueManager DefDIs(Def); + MBB.splice(Insert, &MBB, Def); + DefDIs.move(Insert); + LIS.handleMove(*Def); + + if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) { + // No one else is using this register for anything so we can just stackify + // it in place. + MFI.stackifyVReg(Reg); + } else { + // The register may have unrelated uses or defs; create a new register for + // just our one def and use so that we can stackify it. + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + Def->getOperand(0).setReg(NewReg); + Op.setReg(NewReg); + + // Tell LiveIntervals about the new register. + LIS.createAndComputeVirtRegInterval(NewReg); + + // Tell LiveIntervals about the changes to the old register. + LiveInterval &LI = LIS.getInterval(Reg); + LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(), + LIS.getInstructionIndex(*Op.getParent()).getRegSlot(), + /*RemoveDeadValNo=*/true); + + MFI.stackifyVReg(NewReg); + + DefDIs.updateReg(NewReg); + + LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump()); + } + + imposeStackOrdering(Def); + return Def; +} + +/// A trivially cloneable instruction; clone it and nest the new copy with the +/// current instruction. +static MachineInstr *rematerializeCheapDef( + unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS, + WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI, + const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) { + LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump()); + LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump()); + + WebAssemblyDebugValueManager DefDIs(&Def); + + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI); + Op.setReg(NewReg); + MachineInstr *Clone = &*std::prev(Insert); + LIS.InsertMachineInstrInMaps(*Clone); + LIS.createAndComputeVirtRegInterval(NewReg); + MFI.stackifyVReg(NewReg); + imposeStackOrdering(Clone); + + LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump()); + + // Shrink the interval. + bool IsDead = MRI.use_empty(Reg); + if (!IsDead) { + LiveInterval &LI = LIS.getInterval(Reg); + shrinkToUses(LI, LIS); + IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot()); + } + + // If that was the last use of the original, delete the original. + // Move or clone corresponding DBG_VALUEs to the 'Insert' location. + if (IsDead) { + LLVM_DEBUG(dbgs() << " - Deleting original\n"); + SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot(); + LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx); + LIS.removeInterval(Reg); + LIS.RemoveMachineInstrFromMaps(Def); + Def.eraseFromParent(); + + DefDIs.move(&*Insert); + DefDIs.updateReg(NewReg); + } else { + DefDIs.clone(&*Insert, NewReg); + } + + return Clone; +} + +/// A multiple-use def in the same block with no intervening memory or register +/// dependencies; move the def down, nest it with the current instruction, and +/// insert a tee to satisfy the rest of the uses. As an illustration, rewrite +/// this: +/// +/// Reg = INST ... // Def +/// INST ..., Reg, ... // Insert +/// INST ..., Reg, ... +/// INST ..., Reg, ... +/// +/// to this: +/// +/// DefReg = INST ... // Def (to become the new Insert) +/// TeeReg, Reg = TEE_... DefReg +/// INST ..., TeeReg, ... // Insert +/// INST ..., Reg, ... +/// INST ..., Reg, ... +/// +/// with DefReg and TeeReg stackified. This eliminates a local.get from the +/// resulting code. +static MachineInstr *moveAndTeeForMultiUse( + unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB, + MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) { + LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump()); + + WebAssemblyDebugValueManager DefDIs(Def); + + // Move Def into place. + MBB.splice(Insert, &MBB, Def); + LIS.handleMove(*Def); + + // Create the Tee and attach the registers. + const auto *RegClass = MRI.getRegClass(Reg); + Register TeeReg = MRI.createVirtualRegister(RegClass); + Register DefReg = MRI.createVirtualRegister(RegClass); + MachineOperand &DefMO = Def->getOperand(0); + MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(), + TII->get(getTeeOpcode(RegClass)), TeeReg) + .addReg(Reg, RegState::Define) + .addReg(DefReg, getUndefRegState(DefMO.isDead())); + Op.setReg(TeeReg); + DefMO.setReg(DefReg); + SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot(); + SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot(); + + DefDIs.move(Insert); + + // Tell LiveIntervals we moved the original vreg def from Def to Tee. + LiveInterval &LI = LIS.getInterval(Reg); + LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx); + VNInfo *ValNo = LI.getVNInfoAt(DefIdx); + I->start = TeeIdx; + ValNo->def = TeeIdx; + shrinkToUses(LI, LIS); + + // Finish stackifying the new regs. + LIS.createAndComputeVirtRegInterval(TeeReg); + LIS.createAndComputeVirtRegInterval(DefReg); + MFI.stackifyVReg(DefReg); + MFI.stackifyVReg(TeeReg); + imposeStackOrdering(Def); + imposeStackOrdering(Tee); + + DefDIs.clone(Tee, DefReg); + DefDIs.clone(Insert, TeeReg); + + LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump()); + LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump()); + return Def; +} + +namespace { +/// A stack for walking the tree of instructions being built, visiting the +/// MachineOperands in DFS order. +class TreeWalkerState { + using mop_iterator = MachineInstr::mop_iterator; + using mop_reverse_iterator = std::reverse_iterator<mop_iterator>; + using RangeTy = iterator_range<mop_reverse_iterator>; + SmallVector<RangeTy, 4> Worklist; + +public: + explicit TreeWalkerState(MachineInstr *Insert) { + const iterator_range<mop_iterator> &Range = Insert->explicit_uses(); + if (Range.begin() != Range.end()) + Worklist.push_back(reverse(Range)); + } + + bool done() const { return Worklist.empty(); } + + MachineOperand &pop() { + RangeTy &Range = Worklist.back(); + MachineOperand &Op = *Range.begin(); + Range = drop_begin(Range, 1); + if (Range.begin() == Range.end()) + Worklist.pop_back(); + assert((Worklist.empty() || + Worklist.back().begin() != Worklist.back().end()) && + "Empty ranges shouldn't remain in the worklist"); + return Op; + } + + /// Push Instr's operands onto the stack to be visited. + void pushOperands(MachineInstr *Instr) { + const iterator_range<mop_iterator> &Range(Instr->explicit_uses()); + if (Range.begin() != Range.end()) + Worklist.push_back(reverse(Range)); + } + + /// Some of Instr's operands are on the top of the stack; remove them and + /// re-insert them starting from the beginning (because we've commuted them). + void resetTopOperands(MachineInstr *Instr) { + assert(hasRemainingOperands(Instr) && + "Reseting operands should only be done when the instruction has " + "an operand still on the stack"); + Worklist.back() = reverse(Instr->explicit_uses()); + } + + /// Test whether Instr has operands remaining to be visited at the top of + /// the stack. + bool hasRemainingOperands(const MachineInstr *Instr) const { + if (Worklist.empty()) + return false; + const RangeTy &Range = Worklist.back(); + return Range.begin() != Range.end() && Range.begin()->getParent() == Instr; + } + + /// Test whether the given register is present on the stack, indicating an + /// operand in the tree that we haven't visited yet. Moving a definition of + /// Reg to a point in the tree after that would change its value. + /// + /// This is needed as a consequence of using implicit local.gets for + /// uses and implicit local.sets for defs. + bool isOnStack(unsigned Reg) const { + for (const RangeTy &Range : Worklist) + for (const MachineOperand &MO : Range) + if (MO.isReg() && MO.getReg() == Reg) + return true; + return false; + } +}; + +/// State to keep track of whether commuting is in flight or whether it's been +/// tried for the current instruction and didn't work. +class CommutingState { + /// There are effectively three states: the initial state where we haven't + /// started commuting anything and we don't know anything yet, the tentative + /// state where we've commuted the operands of the current instruction and are + /// revisiting it, and the declined state where we've reverted the operands + /// back to their original order and will no longer commute it further. + bool TentativelyCommuting = false; + bool Declined = false; + + /// During the tentative state, these hold the operand indices of the commuted + /// operands. + unsigned Operand0, Operand1; + +public: + /// Stackification for an operand was not successful due to ordering + /// constraints. If possible, and if we haven't already tried it and declined + /// it, commute Insert's operands and prepare to revisit it. + void maybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker, + const WebAssemblyInstrInfo *TII) { + if (TentativelyCommuting) { + assert(!Declined && + "Don't decline commuting until you've finished trying it"); + // Commuting didn't help. Revert it. + TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); + TentativelyCommuting = false; + Declined = true; + } else if (!Declined && TreeWalker.hasRemainingOperands(Insert)) { + Operand0 = TargetInstrInfo::CommuteAnyOperandIndex; + Operand1 = TargetInstrInfo::CommuteAnyOperandIndex; + if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) { + // Tentatively commute the operands and try again. + TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); + TreeWalker.resetTopOperands(Insert); + TentativelyCommuting = true; + Declined = false; + } + } + } + + /// Stackification for some operand was successful. Reset to the default + /// state. + void reset() { + TentativelyCommuting = false; + Declined = false; + } +}; +} // end anonymous namespace + +bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n" + "********** Function: " + << MF.getName() << '\n'); + + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto &MDT = getAnalysis<MachineDominatorTree>(); + auto &LIS = getAnalysis<LiveIntervals>(); + + // Walk the instructions from the bottom up. Currently we don't look past + // block boundaries, and the blocks aren't ordered so the block visitation + // order isn't significant, but we may want to change this in the future. + for (MachineBasicBlock &MBB : MF) { + // Don't use a range-based for loop, because we modify the list as we're + // iterating over it and the end iterator may change. + for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) { + MachineInstr *Insert = &*MII; + // Don't nest anything inside an inline asm, because we don't have + // constraints for $push inputs. + if (Insert->isInlineAsm()) + continue; + + // Ignore debugging intrinsics. + if (Insert->isDebugValue()) + continue; + + // Iterate through the inputs in reverse order, since we'll be pulling + // operands off the stack in LIFO order. + CommutingState Commuting; + TreeWalkerState TreeWalker(Insert); + while (!TreeWalker.done()) { + MachineOperand &Op = TreeWalker.pop(); + + // We're only interested in explicit virtual register operands. + if (!Op.isReg()) + continue; + + Register Reg = Op.getReg(); + assert(Op.isUse() && "explicit_uses() should only iterate over uses"); + assert(!Op.isImplicit() && + "explicit_uses() should only iterate over explicit operands"); + if (Register::isPhysicalRegister(Reg)) + continue; + + // Identify the definition for this register at this point. + MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS); + if (!Def) + continue; + + // Don't nest an INLINE_ASM def into anything, because we don't have + // constraints for $pop outputs. + if (Def->isInlineAsm()) + continue; + + // Argument instructions represent live-in registers and not real + // instructions. + if (WebAssembly::isArgument(Def->getOpcode())) + continue; + + // Currently catch's return value register cannot be stackified, because + // the wasm LLVM backend currently does not support live-in values + // entering blocks, which is a part of multi-value proposal. + // + // Once we support live-in values of wasm blocks, this can be: + // catch ; push exnref value onto stack + // block exnref -> i32 + // br_on_exn $__cpp_exception ; pop the exnref value + // end_block + // + // But because we don't support it yet, the catch instruction's dst + // register should be assigned to a local to be propagated across + // 'block' boundary now. + // + // TODO Fix this once we support the multi-value proposal. + if (Def->getOpcode() == WebAssembly::CATCH) + continue; + + // Decide which strategy to take. Prefer to move a single-use value + // over cloning it, and prefer cloning over introducing a tee. + // For moving, we require the def to be in the same block as the use; + // this makes things simpler (LiveIntervals' handleMove function only + // supports intra-block moves) and it's MachineSink's job to catch all + // the sinking opportunities anyway. + bool SameBlock = Def->getParent() == &MBB; + bool CanMove = SameBlock && isSafeToMove(Def, Insert, AA, MRI) && + !TreeWalker.isOnStack(Reg); + if (CanMove && hasOneUse(Reg, Def, MRI, MDT, LIS)) { + Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI); + } else if (shouldRematerialize(*Def, AA, TII)) { + Insert = + rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(), + LIS, MFI, MRI, TII, TRI); + } else if (CanMove && + oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) { + Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, + MRI, TII); + } else { + // We failed to stackify the operand. If the problem was ordering + // constraints, Commuting may be able to help. + if (!CanMove && SameBlock) + Commuting.maybeCommute(Insert, TreeWalker, TII); + // Proceed to the next operand. + continue; + } + + // If the instruction we just stackified is an IMPLICIT_DEF, convert it + // to a constant 0 so that the def is explicit, and the push/pop + // correspondence is maintained. + if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF) + convertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS); + + // We stackified an operand. Add the defining instruction's operands to + // the worklist stack now to continue to build an ever deeper tree. + Commuting.reset(); + TreeWalker.pushOperands(Insert); + } + + // If we stackified any operands, skip over the tree to start looking for + // the next instruction we can build a tree on. + if (Insert != &*MII) { + imposeStackOrdering(&*MII); + MII = MachineBasicBlock::iterator(Insert).getReverse(); + Changed = true; + } + } + } + + // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so + // that it never looks like a use-before-def. + if (Changed) { + MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK); + for (MachineBasicBlock &MBB : MF) + MBB.addLiveIn(WebAssembly::VALUE_STACK); + } + +#ifndef NDEBUG + // Verify that pushes and pops are performed in LIFO order. + SmallVector<unsigned, 0> Stack; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.isDebugInstr()) + continue; + for (MachineOperand &MO : reverse(MI.explicit_operands())) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + + if (MFI.isVRegStackified(Reg)) { + if (MO.isDef()) + Stack.push_back(Reg); + else + assert(Stack.pop_back_val() == Reg && + "Register stack pop should be paired with a push"); + } + } + } + // TODO: Generalize this code to support keeping values on the stack across + // basic block boundaries. + assert(Stack.empty() && + "Register stack pushes and pops should be balanced"); + } +#endif + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp new file mode 100644 index 000000000000..789a025794ea --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -0,0 +1,150 @@ +//===-- WebAssemblyRegisterInfo.cpp - WebAssembly Register Information ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the WebAssembly implementation of the +/// TargetRegisterInfo class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyRegisterInfo.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyFrameLowering.h" +#include "WebAssemblyInstrInfo.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-info" + +#define GET_REGINFO_TARGET_DESC +#include "WebAssemblyGenRegisterInfo.inc" + +WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) + : WebAssemblyGenRegisterInfo(0), TT(TT) {} + +const MCPhysReg * +WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { + static const MCPhysReg CalleeSavedRegs[] = {0}; + return CalleeSavedRegs; +} + +BitVector +WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const { + BitVector Reserved(getNumRegs()); + for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32, + WebAssembly::FP64}) + Reserved.set(Reg); + return Reserved; +} + +void WebAssemblyRegisterInfo::eliminateFrameIndex( + MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, + RegScavenger * /*RS*/) const { + assert(SPAdj == 0); + MachineInstr &MI = *II; + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); + + assert(MFI.getObjectSize(FrameIndex) != 0 && + "We assume that variable-sized objects have already been lowered, " + "and don't use FrameIndex operands."); + Register FrameRegister = getFrameRegister(MF); + + // If this is the address operand of a load or store, make it relative to SP + // and fold the frame offset directly in. + unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::addr); + if (AddrOperandNum == FIOperandNum) { + unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::off); + assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0); + int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset; + + if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) { + MI.getOperand(OffsetOperandNum).setImm(Offset); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameRegister, /*isDef=*/false); + return; + } + } + + // If this is an address being added to a constant, fold the frame offset + // into the constant. + if (MI.getOpcode() == WebAssembly::ADD_I32) { + MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); + if (OtherMO.isReg()) { + Register OtherMOReg = OtherMO.getReg(); + if (Register::isVirtualRegister(OtherMOReg)) { + MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg); + // TODO: For now we just opportunistically do this in the case where + // the CONST_I32 happens to have exactly one def and one use. We + // should generalize this to optimize in more cases. + if (Def && Def->getOpcode() == WebAssembly::CONST_I32 && + MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) { + MachineOperand &ImmMO = Def->getOperand(1); + ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameRegister, /*isDef=*/false); + return; + } + } + } + } + + // Otherwise create an i32.add SP, offset and make it the operand. + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + unsigned FIRegOperand = FrameRegister; + if (FrameOffset) { + // Create i32.add SP, offset and make it the operand. + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + Register OffsetOp = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + OffsetOp) + .addImm(FrameOffset); + FIRegOperand = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), + FIRegOperand) + .addReg(FrameRegister) + .addReg(OffsetOp); + } + MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*isDef=*/false); +} + +Register +WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + static const unsigned Regs[2][2] = { + /* !isArch64Bit isArch64Bit */ + /* !hasFP */ {WebAssembly::SP32, WebAssembly::SP64}, + /* hasFP */ {WebAssembly::FP32, WebAssembly::FP64}}; + const WebAssemblyFrameLowering *TFI = getFrameLowering(MF); + return Regs[TFI->hasFP(MF)][TT.isArch64Bit()]; +} + +const TargetRegisterClass * +WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + assert(Kind == 0 && "Only one kind of pointer on WebAssembly"); + if (MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) + return &WebAssembly::I64RegClass; + return &WebAssembly::I32RegClass; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h new file mode 100644 index 000000000000..7880eb217dbf --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -0,0 +1,53 @@ +// WebAssemblyRegisterInfo.h - WebAssembly Register Information Impl -*- C++ -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the WebAssembly implementation of the +/// WebAssemblyRegisterInfo class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H + +#define GET_REGINFO_HEADER +#include "WebAssemblyGenRegisterInfo.inc" + +namespace llvm { + +class MachineFunction; +class RegScavenger; +class TargetRegisterClass; +class Triple; + +class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo { + const Triple &TT; + +public: + explicit WebAssemblyRegisterInfo(const Triple &TT); + + // Code Generation virtual methods. + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + // Debug information queries. + Register getFrameRegister(const MachineFunction &MF) const override; + + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; + // This does not apply to wasm. + const uint32_t *getNoPreservedMask() const override { return nullptr; } +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td new file mode 100644 index 000000000000..6d3d6c723277 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -0,0 +1,67 @@ +//WebAssemblyRegisterInfo.td-Describe the WebAssembly Registers -*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes the WebAssembly register classes and some nominal +/// physical registers. +/// +//===----------------------------------------------------------------------===// + +class WebAssemblyReg<string n> : Register<n> { + let Namespace = "WebAssembly"; +} + +class WebAssemblyRegClass<list<ValueType> regTypes, int alignment, dag regList> + : RegisterClass<"WebAssembly", regTypes, alignment, regList>; + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +// Special registers used as the frame and stack pointer. +// +// WebAssembly may someday supports mixed 32-bit and 64-bit heaps in the same +// application, which requires separate width FP and SP. +def FP32 : WebAssemblyReg<"%FP32">; +def FP64 : WebAssemblyReg<"%FP64">; +def SP32 : WebAssemblyReg<"%SP32">; +def SP64 : WebAssemblyReg<"%SP64">; + +// The register allocation framework requires register classes have at least +// one register, so we define a few for the integer / floating point register +// classes since we otherwise don't need a physical register in those classes. +// These are also used a "types" in the generated assembly matcher. +def I32_0 : WebAssemblyReg<"%i32.0">; +def I64_0 : WebAssemblyReg<"%i64.0">; +def F32_0 : WebAssemblyReg<"%f32.0">; +def F64_0 : WebAssemblyReg<"%f64.0">; + +def V128_0: WebAssemblyReg<"%v128">; + +def EXNREF_0 : WebAssemblyReg<"%exnref.0">; + +// The value stack "register". This is an opaque entity which serves to order +// uses and defs that must remain in LIFO order. +def VALUE_STACK : WebAssemblyReg<"STACK">; + +// The incoming arguments "register". This is an opaque entity which serves to +// order the ARGUMENT instructions that are emulating live-in registers and +// must not be scheduled below other instructions. +def ARGUMENTS : WebAssemblyReg<"ARGUMENTS">; + +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// + +def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32, I32_0)>; +def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64, I64_0)>; +def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; +def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; +def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128, + (add V128_0)>; +def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp new file mode 100644 index 000000000000..5eafd6c54e78 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -0,0 +1,102 @@ +//===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a pass that replaces physical registers with +/// virtual registers. +/// +/// LLVM expects certain physical registers, such as a stack pointer. However, +/// WebAssembly doesn't actually have such physical registers. This pass is run +/// once LLVM no longer needs these registers, and replaces them with virtual +/// registers, so they can participate in register stackifying and coloring in +/// the normal way. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-replace-phys-regs" + +namespace { +class WebAssemblyReplacePhysRegs final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {} + +private: + StringRef getPassName() const override { + return "WebAssembly Replace Physical Registers"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyReplacePhysRegs::ID = 0; +INITIALIZE_PASS(WebAssemblyReplacePhysRegs, DEBUG_TYPE, + "Replace physical registers with virtual registers", false, + false) + +FunctionPass *llvm::createWebAssemblyReplacePhysRegs() { + return new WebAssemblyReplacePhysRegs(); +} + +bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Replace Physical Registers **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); + bool Changed = false; + + assert(!mustPreserveAnalysisID(LiveIntervalsID) && + "LiveIntervals shouldn't be active yet!"); + // We don't preserve SSA or liveness. + MRI.leaveSSA(); + MRI.invalidateLiveness(); + + for (unsigned PReg = WebAssembly::NoRegister + 1; + PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) { + // Skip fake registers that are never used explicitly. + if (PReg == WebAssembly::VALUE_STACK || PReg == WebAssembly::ARGUMENTS) + continue; + + // Replace explicit uses of the physical register with a virtual register. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg); + unsigned VReg = WebAssembly::NoRegister; + for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) { + MachineOperand &MO = *I++; + if (!MO.isImplicit()) { + if (VReg == WebAssembly::NoRegister) + VReg = MRI.createVirtualRegister(RC); + MO.setReg(VReg); + if (MO.getParent()->isDebugValue()) + MO.setIsDebug(); + Changed = true; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp new file mode 100644 index 000000000000..7b9ae90326f0 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -0,0 +1,900 @@ +// CodeGen/RuntimeLibcallSignatures.cpp - R.T. Lib. Call Signatures -*- C++ -*-- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains signature information for runtime libcalls. +/// +/// CodeGen uses external symbols, which it refers to by name. The WebAssembly +/// target needs type information for all functions. This file contains a big +/// table providing type signatures for all runtime library functions that LLVM +/// uses. +/// +/// This is currently a fairly heavy-handed solution. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyRuntimeLibcallSignatures.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +namespace { + +enum RuntimeLibcallSignature { + func, + f32_func_f32, + f32_func_f64, + f32_func_i32, + f32_func_i64, + f32_func_i16, + f64_func_f32, + f64_func_f64, + f64_func_i32, + f64_func_i64, + i32_func_f32, + i32_func_f64, + i32_func_i32, + i64_func_f32, + i64_func_f64, + i64_func_i64, + f32_func_f32_f32, + f32_func_f32_i32, + f32_func_i64_i64, + f64_func_f64_f64, + f64_func_f64_i32, + f64_func_i64_i64, + i16_func_f32, + i16_func_f64, + i16_func_i64_i64, + i8_func_i8_i8, + func_f32_iPTR_iPTR, + func_f64_iPTR_iPTR, + i16_func_i16_i16, + i32_func_f32_f32, + i32_func_f64_f64, + i32_func_i32_i32, + i32_func_i32_i32_iPTR, + i64_func_i64_i64, + i64_func_i64_i64_iPTR, + i64_i64_func_f32, + i64_i64_func_f64, + i16_i16_func_i16_i16, + i32_i32_func_i32_i32, + i64_i64_func_i64_i64, + i64_i64_func_i64_i64_i64_i64, + i64_i64_func_i64_i64_i64_i64_iPTR, + i64_i64_i64_i64_func_i64_i64_i64_i64, + i64_i64_func_i64_i64_i32, + iPTR_func_iPTR_i32_iPTR, + iPTR_func_iPTR_iPTR_iPTR, + f32_func_f32_f32_f32, + f64_func_f64_f64_f64, + func_i64_i64_iPTR_iPTR, + func_iPTR_f32, + func_iPTR_f64, + func_iPTR_i32, + func_iPTR_i64, + func_iPTR_i64_i64, + func_iPTR_i64_i64_i64_i64, + func_iPTR_i64_i64_i64_i64_i64_i64, + i32_func_i64_i64, + i32_func_i64_i64_i64_i64, + iPTR_func_f32, + iPTR_func_f64, + iPTR_func_i64_i64, + unsupported +}; + +struct RuntimeLibcallSignatureTable { + std::vector<RuntimeLibcallSignature> Table; + + // Any newly-added libcalls will be unsupported by default. + RuntimeLibcallSignatureTable() : Table(RTLIB::UNKNOWN_LIBCALL, unsupported) { + // Integer + Table[RTLIB::SHL_I16] = i16_func_i16_i16; + Table[RTLIB::SHL_I32] = i32_func_i32_i32; + Table[RTLIB::SHL_I64] = i64_func_i64_i64; + Table[RTLIB::SHL_I128] = i64_i64_func_i64_i64_i32; + Table[RTLIB::SRL_I16] = i16_func_i16_i16; + Table[RTLIB::SRL_I32] = i32_func_i32_i32; + Table[RTLIB::SRL_I64] = i64_func_i64_i64; + Table[RTLIB::SRL_I128] = i64_i64_func_i64_i64_i32; + Table[RTLIB::SRA_I16] = i16_func_i16_i16; + Table[RTLIB::SRA_I32] = i32_func_i32_i32; + Table[RTLIB::SRA_I64] = i64_func_i64_i64; + Table[RTLIB::SRA_I128] = i64_i64_func_i64_i64_i32; + Table[RTLIB::MUL_I8] = i8_func_i8_i8; + Table[RTLIB::MUL_I16] = i16_func_i16_i16; + Table[RTLIB::MUL_I32] = i32_func_i32_i32; + Table[RTLIB::MUL_I64] = i64_func_i64_i64; + Table[RTLIB::MUL_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::MULO_I32] = i32_func_i32_i32_iPTR; + Table[RTLIB::MULO_I64] = i64_func_i64_i64_iPTR; + Table[RTLIB::MULO_I128] = i64_i64_func_i64_i64_i64_i64_iPTR; + Table[RTLIB::SDIV_I8] = i8_func_i8_i8; + Table[RTLIB::SDIV_I16] = i16_func_i16_i16; + Table[RTLIB::SDIV_I32] = i32_func_i32_i32; + Table[RTLIB::SDIV_I64] = i64_func_i64_i64; + Table[RTLIB::SDIV_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::UDIV_I8] = i8_func_i8_i8; + Table[RTLIB::UDIV_I16] = i16_func_i16_i16; + Table[RTLIB::UDIV_I32] = i32_func_i32_i32; + Table[RTLIB::UDIV_I64] = i64_func_i64_i64; + Table[RTLIB::UDIV_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::SREM_I8] = i8_func_i8_i8; + Table[RTLIB::SREM_I16] = i16_func_i16_i16; + Table[RTLIB::SREM_I32] = i32_func_i32_i32; + Table[RTLIB::SREM_I64] = i64_func_i64_i64; + Table[RTLIB::SREM_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::UREM_I8] = i8_func_i8_i8; + Table[RTLIB::UREM_I16] = i16_func_i16_i16; + Table[RTLIB::UREM_I32] = i32_func_i32_i32; + Table[RTLIB::UREM_I64] = i64_func_i64_i64; + Table[RTLIB::UREM_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::SDIVREM_I8] = i8_func_i8_i8; + Table[RTLIB::SDIVREM_I16] = i16_i16_func_i16_i16; + Table[RTLIB::SDIVREM_I32] = i32_i32_func_i32_i32; + Table[RTLIB::SDIVREM_I64] = i64_func_i64_i64; + Table[RTLIB::SDIVREM_I128] = i64_i64_i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::UDIVREM_I8] = i8_func_i8_i8; + Table[RTLIB::UDIVREM_I16] = i16_i16_func_i16_i16; + Table[RTLIB::UDIVREM_I32] = i32_i32_func_i32_i32; + Table[RTLIB::UDIVREM_I64] = i64_i64_func_i64_i64; + Table[RTLIB::UDIVREM_I128] = i64_i64_i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::NEG_I32] = i32_func_i32; + Table[RTLIB::NEG_I64] = i64_func_i64; + + // Floating-point. + // All F80 and PPCF128 routines are unsupported. + Table[RTLIB::ADD_F32] = f32_func_f32_f32; + Table[RTLIB::ADD_F64] = f64_func_f64_f64; + Table[RTLIB::ADD_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::SUB_F32] = f32_func_f32_f32; + Table[RTLIB::SUB_F64] = f64_func_f64_f64; + Table[RTLIB::SUB_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::MUL_F32] = f32_func_f32_f32; + Table[RTLIB::MUL_F64] = f64_func_f64_f64; + Table[RTLIB::MUL_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::DIV_F32] = f32_func_f32_f32; + Table[RTLIB::DIV_F64] = f64_func_f64_f64; + Table[RTLIB::DIV_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::REM_F32] = f32_func_f32_f32; + Table[RTLIB::REM_F64] = f64_func_f64_f64; + Table[RTLIB::REM_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMA_F32] = f32_func_f32_f32_f32; + Table[RTLIB::FMA_F64] = f64_func_f64_f64_f64; + Table[RTLIB::FMA_F128] = func_iPTR_i64_i64_i64_i64_i64_i64; + Table[RTLIB::POWI_F32] = f32_func_f32_i32; + Table[RTLIB::POWI_F64] = f64_func_f64_i32; + Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::SQRT_F32] = f32_func_f32; + Table[RTLIB::SQRT_F64] = f64_func_f64; + Table[RTLIB::SQRT_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG_F32] = f32_func_f32; + Table[RTLIB::LOG_F64] = f64_func_f64; + Table[RTLIB::LOG_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG2_F32] = f32_func_f32; + Table[RTLIB::LOG2_F64] = f64_func_f64; + Table[RTLIB::LOG2_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG10_F32] = f32_func_f32; + Table[RTLIB::LOG10_F64] = f64_func_f64; + Table[RTLIB::LOG10_F128] = func_iPTR_i64_i64; + Table[RTLIB::EXP_F32] = f32_func_f32; + Table[RTLIB::EXP_F64] = f64_func_f64; + Table[RTLIB::EXP_F128] = func_iPTR_i64_i64; + Table[RTLIB::EXP2_F32] = f32_func_f32; + Table[RTLIB::EXP2_F64] = f64_func_f64; + Table[RTLIB::EXP2_F128] = func_iPTR_i64_i64; + Table[RTLIB::SIN_F32] = f32_func_f32; + Table[RTLIB::SIN_F64] = f64_func_f64; + Table[RTLIB::SIN_F128] = func_iPTR_i64_i64; + Table[RTLIB::COS_F32] = f32_func_f32; + Table[RTLIB::COS_F64] = f64_func_f64; + Table[RTLIB::COS_F128] = func_iPTR_i64_i64; + Table[RTLIB::SINCOS_F32] = func_f32_iPTR_iPTR; + Table[RTLIB::SINCOS_F64] = func_f64_iPTR_iPTR; + Table[RTLIB::SINCOS_F128] = func_i64_i64_iPTR_iPTR; + Table[RTLIB::POW_F32] = f32_func_f32_f32; + Table[RTLIB::POW_F64] = f64_func_f64_f64; + Table[RTLIB::POW_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::CEIL_F32] = f32_func_f32; + Table[RTLIB::CEIL_F64] = f64_func_f64; + Table[RTLIB::CEIL_F128] = func_iPTR_i64_i64; + Table[RTLIB::TRUNC_F32] = f32_func_f32; + Table[RTLIB::TRUNC_F64] = f64_func_f64; + Table[RTLIB::TRUNC_F128] = func_iPTR_i64_i64; + Table[RTLIB::RINT_F32] = f32_func_f32; + Table[RTLIB::RINT_F64] = f64_func_f64; + Table[RTLIB::RINT_F128] = func_iPTR_i64_i64; + Table[RTLIB::NEARBYINT_F32] = f32_func_f32; + Table[RTLIB::NEARBYINT_F64] = f64_func_f64; + Table[RTLIB::NEARBYINT_F128] = func_iPTR_i64_i64; + Table[RTLIB::ROUND_F32] = f32_func_f32; + Table[RTLIB::ROUND_F64] = f64_func_f64; + Table[RTLIB::ROUND_F128] = func_iPTR_i64_i64; + Table[RTLIB::LROUND_F32] = iPTR_func_f32; + Table[RTLIB::LROUND_F64] = iPTR_func_f64; + Table[RTLIB::LROUND_F128] = iPTR_func_i64_i64; + Table[RTLIB::LLROUND_F32] = i64_func_f32; + Table[RTLIB::LLROUND_F64] = i64_func_f64; + Table[RTLIB::LLROUND_F128] = i64_func_i64_i64; + Table[RTLIB::LRINT_F32] = iPTR_func_f32; + Table[RTLIB::LRINT_F64] = iPTR_func_f64; + Table[RTLIB::LRINT_F128] = iPTR_func_i64_i64; + Table[RTLIB::LLRINT_F32] = i64_func_f32; + Table[RTLIB::LLRINT_F64] = i64_func_f64; + Table[RTLIB::LLRINT_F128] = i64_func_i64_i64; + Table[RTLIB::FLOOR_F32] = f32_func_f32; + Table[RTLIB::FLOOR_F64] = f64_func_f64; + Table[RTLIB::FLOOR_F128] = func_iPTR_i64_i64; + Table[RTLIB::COPYSIGN_F32] = f32_func_f32_f32; + Table[RTLIB::COPYSIGN_F64] = f64_func_f64_f64; + Table[RTLIB::COPYSIGN_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMIN_F32] = f32_func_f32_f32; + Table[RTLIB::FMIN_F64] = f64_func_f64_f64; + Table[RTLIB::FMIN_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMAX_F32] = f32_func_f32_f32; + Table[RTLIB::FMAX_F64] = f64_func_f64_f64; + Table[RTLIB::FMAX_F128] = func_iPTR_i64_i64_i64_i64; + + // Conversion + // All F80 and PPCF128 routines are unsupported. + Table[RTLIB::FPEXT_F64_F128] = func_iPTR_f64; + Table[RTLIB::FPEXT_F32_F128] = func_iPTR_f32; + Table[RTLIB::FPEXT_F32_F64] = f64_func_f32; + Table[RTLIB::FPEXT_F16_F32] = f32_func_i16; + Table[RTLIB::FPROUND_F32_F16] = i16_func_f32; + Table[RTLIB::FPROUND_F64_F16] = i16_func_f64; + Table[RTLIB::FPROUND_F64_F32] = f32_func_f64; + Table[RTLIB::FPROUND_F128_F16] = i16_func_i64_i64; + Table[RTLIB::FPROUND_F128_F32] = f32_func_i64_i64; + Table[RTLIB::FPROUND_F128_F64] = f64_func_i64_i64; + Table[RTLIB::FPTOSINT_F32_I32] = i32_func_f32; + Table[RTLIB::FPTOSINT_F32_I64] = i64_func_f32; + Table[RTLIB::FPTOSINT_F32_I128] = i64_i64_func_f32; + Table[RTLIB::FPTOSINT_F64_I32] = i32_func_f64; + Table[RTLIB::FPTOSINT_F64_I64] = i64_func_f64; + Table[RTLIB::FPTOSINT_F64_I128] = i64_i64_func_f64; + Table[RTLIB::FPTOSINT_F128_I32] = i32_func_i64_i64; + Table[RTLIB::FPTOSINT_F128_I64] = i64_func_i64_i64; + Table[RTLIB::FPTOSINT_F128_I128] = i64_i64_func_i64_i64; + Table[RTLIB::FPTOUINT_F32_I32] = i32_func_f32; + Table[RTLIB::FPTOUINT_F32_I64] = i64_func_f32; + Table[RTLIB::FPTOUINT_F32_I128] = i64_i64_func_f32; + Table[RTLIB::FPTOUINT_F64_I32] = i32_func_f64; + Table[RTLIB::FPTOUINT_F64_I64] = i64_func_f64; + Table[RTLIB::FPTOUINT_F64_I128] = i64_i64_func_f64; + Table[RTLIB::FPTOUINT_F128_I32] = i32_func_i64_i64; + Table[RTLIB::FPTOUINT_F128_I64] = i64_func_i64_i64; + Table[RTLIB::FPTOUINT_F128_I128] = i64_i64_func_i64_i64; + Table[RTLIB::SINTTOFP_I32_F32] = f32_func_i32; + Table[RTLIB::SINTTOFP_I32_F64] = f64_func_i32; + Table[RTLIB::SINTTOFP_I32_F128] = func_iPTR_i32; + Table[RTLIB::SINTTOFP_I64_F32] = f32_func_i64; + Table[RTLIB::SINTTOFP_I64_F64] = f64_func_i64; + Table[RTLIB::SINTTOFP_I64_F128] = func_iPTR_i64; + Table[RTLIB::SINTTOFP_I128_F32] = f32_func_i64_i64; + Table[RTLIB::SINTTOFP_I128_F64] = f64_func_i64_i64; + Table[RTLIB::SINTTOFP_I128_F128] = func_iPTR_i64_i64; + Table[RTLIB::UINTTOFP_I32_F32] = f32_func_i32; + Table[RTLIB::UINTTOFP_I32_F64] = f64_func_i64; + Table[RTLIB::UINTTOFP_I32_F128] = func_iPTR_i32; + Table[RTLIB::UINTTOFP_I64_F32] = f32_func_i64; + Table[RTLIB::UINTTOFP_I64_F64] = f64_func_i64; + Table[RTLIB::UINTTOFP_I64_F128] = func_iPTR_i64; + Table[RTLIB::UINTTOFP_I128_F32] = f32_func_i64_i64; + Table[RTLIB::UINTTOFP_I128_F64] = f64_func_i64_i64; + Table[RTLIB::UINTTOFP_I128_F128] = func_iPTR_i64_i64; + + // Comparison + // ALl F80 and PPCF128 routines are unsupported. + Table[RTLIB::OEQ_F32] = i32_func_f32_f32; + Table[RTLIB::OEQ_F64] = i32_func_f64_f64; + Table[RTLIB::OEQ_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::UNE_F32] = i32_func_f32_f32; + Table[RTLIB::UNE_F64] = i32_func_f64_f64; + Table[RTLIB::UNE_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OGE_F32] = i32_func_f32_f32; + Table[RTLIB::OGE_F64] = i32_func_f64_f64; + Table[RTLIB::OGE_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OLT_F32] = i32_func_f32_f32; + Table[RTLIB::OLT_F64] = i32_func_f64_f64; + Table[RTLIB::OLT_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OLE_F32] = i32_func_f32_f32; + Table[RTLIB::OLE_F64] = i32_func_f64_f64; + Table[RTLIB::OLE_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OGT_F32] = i32_func_f32_f32; + Table[RTLIB::OGT_F64] = i32_func_f64_f64; + Table[RTLIB::OGT_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::UO_F32] = i32_func_f32_f32; + Table[RTLIB::UO_F64] = i32_func_f64_f64; + Table[RTLIB::UO_F128] = i32_func_i64_i64_i64_i64; + // O_FXX has the weird property that it uses the same libcall name as UO_FXX + // This breaks our name-based lookup. Fortunately only the UO family of + // libcalls appears to be actually used. + Table[RTLIB::O_F32] = unsupported; + Table[RTLIB::O_F64] = unsupported; + Table[RTLIB::O_F128] = unsupported; + + // Memory + Table[RTLIB::MEMCPY] = iPTR_func_iPTR_iPTR_iPTR; + Table[RTLIB::MEMSET] = iPTR_func_iPTR_i32_iPTR; + Table[RTLIB::MEMMOVE] = iPTR_func_iPTR_iPTR_iPTR; + + // __stack_chk_fail + Table[RTLIB::STACKPROTECTOR_CHECK_FAIL] = func; + + // Return address handling + Table[RTLIB::RETURN_ADDRESS] = i32_func_i32; + + // Element-wise Atomic memory + // TODO: Fix these when we implement atomic support + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = unsupported; + + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = unsupported; + + // Atomic '__sync_*' libcalls. + // TODO: Fix these when we implement atomic support + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_16] = unsupported; + + // Atomic '__atomic_*' libcalls. + // TODO: Fix these when we implement atomic support + Table[RTLIB::ATOMIC_LOAD] = unsupported; + Table[RTLIB::ATOMIC_LOAD_1] = unsupported; + Table[RTLIB::ATOMIC_LOAD_2] = unsupported; + Table[RTLIB::ATOMIC_LOAD_4] = unsupported; + Table[RTLIB::ATOMIC_LOAD_8] = unsupported; + Table[RTLIB::ATOMIC_LOAD_16] = unsupported; + + Table[RTLIB::ATOMIC_STORE] = unsupported; + Table[RTLIB::ATOMIC_STORE_1] = unsupported; + Table[RTLIB::ATOMIC_STORE_2] = unsupported; + Table[RTLIB::ATOMIC_STORE_4] = unsupported; + Table[RTLIB::ATOMIC_STORE_8] = unsupported; + Table[RTLIB::ATOMIC_STORE_16] = unsupported; + + Table[RTLIB::ATOMIC_EXCHANGE] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_1] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_2] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_4] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_8] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_16] = unsupported; + + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_ADD_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_SUB_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_AND_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_OR_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_XOR_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_NAND_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_16] = unsupported; + } +}; + +ManagedStatic<RuntimeLibcallSignatureTable> RuntimeLibcallSignatures; + +// Maps libcall names to their RTLIB::Libcall number. Builds the map in a +// constructor for use with ManagedStatic +struct StaticLibcallNameMap { + StringMap<RTLIB::Libcall> Map; + StaticLibcallNameMap() { + static const std::pair<const char *, RTLIB::Libcall> NameLibcalls[] = { +#define HANDLE_LIBCALL(code, name) {(const char *)name, RTLIB::code}, +#include "llvm/IR/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL + }; + for (const auto &NameLibcall : NameLibcalls) { + if (NameLibcall.first != nullptr && + RuntimeLibcallSignatures->Table[NameLibcall.second] != unsupported) { + assert(Map.find(NameLibcall.first) == Map.end() && + "duplicate libcall names in name map"); + Map[NameLibcall.first] = NameLibcall.second; + } + } + // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is + // consistent with the f64 and f128 names. + Map["__extendhfsf2"] = RTLIB::FPEXT_F16_F32; + Map["__truncsfhf2"] = RTLIB::FPROUND_F32_F16; + + Map["emscripten_return_address"] = RTLIB::RETURN_ADDRESS; + } +}; + +} // end anonymous namespace + +void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, + RTLIB::Libcall LC, + SmallVectorImpl<wasm::ValType> &Rets, + SmallVectorImpl<wasm::ValType> &Params) { + assert(Rets.empty()); + assert(Params.empty()); + + wasm::ValType PtrTy = + Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32; + + auto &Table = RuntimeLibcallSignatures->Table; + switch (Table[LC]) { + case func: + break; + case f32_func_f32: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + break; + case f32_func_f64: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F64); + break; + case f32_func_i32: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::I32); + break; + case f32_func_i64: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::I64); + break; + case f32_func_i16: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::I32); + break; + case f64_func_f32: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F32); + break; + case f64_func_f64: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + break; + case f64_func_i32: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::I32); + break; + case f64_func_i64: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::I64); + break; + case i32_func_f32: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::F32); + break; + case i32_func_f64: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::F64); + break; + case i32_func_i32: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + break; + case i64_func_f32: + Rets.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::F32); + break; + case i64_func_f64: + Rets.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::F64); + break; + case i64_func_i64: + Rets.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case f32_func_f32_f32: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + break; + case f32_func_f32_i32: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::I32); + break; + case f32_func_i64_i64: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case f64_func_f64_f64: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + break; + case f64_func_f64_i32: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::I32); + break; + case f64_func_i64_i64: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i16_func_f32: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::F32); + break; + case i16_func_f64: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::F64); + break; + case i16_func_i64_i64: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i8_func_i8_i8: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + break; + case func_f32_iPTR_iPTR: + Params.push_back(wasm::ValType::F32); + Params.push_back(PtrTy); + Params.push_back(PtrTy); + break; + case func_f64_iPTR_iPTR: + Params.push_back(wasm::ValType::F64); + Params.push_back(PtrTy); + Params.push_back(PtrTy); + break; + case i16_func_i16_i16: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + break; + case i32_func_f32_f32: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + break; + case i32_func_f64_f64: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + break; + case i32_func_i32_i32: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + break; + case i32_func_i32_i32_iPTR: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + Params.push_back(PtrTy); + break; + case i64_func_i64_i64: + Rets.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i64_func_i64_i64_iPTR: + Rets.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(PtrTy); + break; + case i64_i64_func_f32: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::F32); + break; + case i64_i64_func_f64: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::F64); + break; + case i16_i16_func_i16_i16: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I32); + Rets.push_back(wasm::ValType::I32); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + break; + case i32_i32_func_i32_i32: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I32); + Rets.push_back(wasm::ValType::I32); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I32); + break; + case i64_i64_func_i64_i64: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i64_i64_func_i64_i64_i64_i64: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i64_i64_func_i64_i64_i64_i64_iPTR: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(PtrTy); + break; + case i64_i64_i64_i64_func_i64_i64_i64_i64: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i64_i64_func_i64_i64_i32: +#if 0 // TODO: Enable this when wasm gets multiple-return-value support. + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); +#else + Params.push_back(PtrTy); +#endif + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I32); + break; + case iPTR_func_iPTR_i32_iPTR: + Rets.push_back(PtrTy); + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::I32); + Params.push_back(PtrTy); + break; + case iPTR_func_iPTR_iPTR_iPTR: + Rets.push_back(PtrTy); + Params.push_back(PtrTy); + Params.push_back(PtrTy); + Params.push_back(PtrTy); + break; + case f32_func_f32_f32_f32: + Rets.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + Params.push_back(wasm::ValType::F32); + break; + case f64_func_f64_f64_f64: + Rets.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + Params.push_back(wasm::ValType::F64); + break; + case func_i64_i64_iPTR_iPTR: + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(PtrTy); + Params.push_back(PtrTy); + break; + case func_iPTR_f32: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::F32); + break; + case func_iPTR_f64: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::F64); + break; + case func_iPTR_i32: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::I32); + break; + case func_iPTR_i64: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::I64); + break; + case func_iPTR_i64_i64: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case func_iPTR_i64_i64_i64_i64: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case func_iPTR_i64_i64_i64_i64_i64_i64: + Params.push_back(PtrTy); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i32_func_i64_i64: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i32_func_i64_i64_i64_i64: + Rets.push_back(wasm::ValType::I32); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case iPTR_func_f32: + Rets.push_back(PtrTy); + Params.push_back(wasm::ValType::F32); + break; + case iPTR_func_f64: + Rets.push_back(PtrTy); + Params.push_back(wasm::ValType::F64); + break; + case iPTR_func_i64_i64: + Rets.push_back(PtrTy); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case unsupported: + llvm_unreachable("unsupported runtime library signature"); + } +} + +static ManagedStatic<StaticLibcallNameMap> LibcallNameMap; +// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed +// other than here, just roll its logic into this version. +void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, + const char *Name, + SmallVectorImpl<wasm::ValType> &Rets, + SmallVectorImpl<wasm::ValType> &Params) { + auto &Map = LibcallNameMap->Map; + auto Val = Map.find(Name); +#ifndef NDEBUG + if (Val == Map.end()) { + auto message = std::string("unexpected runtime library name: ") + Name; + llvm_unreachable(message.c_str()); + } +#endif + return getLibcallSignature(Subtarget, Val->second, Rets, Params); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h new file mode 100644 index 000000000000..6ae8aaaba59c --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h @@ -0,0 +1,37 @@ +// CodeGen/RuntimeLibcallSignatures.h - R.T. Lib. Call Signatures -*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides signature information for runtime libcalls. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_RUNTIME_LIBCALL_SIGNATURES_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_RUNTIME_LIBCALL_SIGNATURES_H + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" + +namespace llvm { + +class WebAssemblySubtarget; + +extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget, + RTLIB::Libcall LC, + SmallVectorImpl<wasm::ValType> &Rets, + SmallVectorImpl<wasm::ValType> &Params); + +extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget, + const char *Name, + SmallVectorImpl<wasm::ValType> &Rets, + SmallVectorImpl<wasm::ValType> &Params); + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp new file mode 100644 index 000000000000..890e4b8e4e2a --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -0,0 +1,59 @@ +//===-- WebAssemblySelectionDAGInfo.cpp - WebAssembly SelectionDAG Info ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the WebAssemblySelectionDAGInfo class. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-selectiondag-info" + +WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor + +SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + if (!DAG.getMachineFunction() + .getSubtarget<WebAssemblySubtarget>() + .hasBulkMemory()) + return SDValue(); + + SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); + return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other, + {Chain, MemIdx, MemIdx, Dst, Src, + DAG.getZExtOrTrunc(Size, DL, MVT::i32)}); +} + +SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, Align, + IsVolatile, false, DstPtrInfo, + SrcPtrInfo); +} + +SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val, + SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { + if (!DAG.getMachineFunction() + .getSubtarget<WebAssemblySubtarget>() + .hasBulkMemory()) + return SDValue(); + + SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); + // Only low byte matters for val argument, so anyext the i8 + return DAG.getNode(WebAssemblyISD::MEMORY_FILL, DL, MVT::Other, Chain, MemIdx, + Dst, DAG.getAnyExtOrTrunc(Val, DL, MVT::i32), + DAG.getZExtOrTrunc(Size, DL, MVT::i32)); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h new file mode 100644 index 000000000000..0b90ece27dff --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -0,0 +1,44 @@ +//=- WebAssemblySelectionDAGInfo.h - WebAssembly SelectionDAG Info -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the WebAssembly subclass for +/// SelectionDAGTargetInfo. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class WebAssemblySelectionDAGInfo final : public SelectionDAGTargetInfo { +public: + ~WebAssemblySelectionDAGInfo() override; + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp new file mode 100644 index 000000000000..a249ccf17638 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -0,0 +1,97 @@ +//=- WebAssemblySetP2AlignOperands.cpp - Set alignments on loads and stores -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file sets the p2align operands on load and store instructions. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" +#include "WebAssemblyInstrInfo.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-set-p2align-operands" + +namespace { +class WebAssemblySetP2AlignOperands final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblySetP2AlignOperands() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "WebAssembly Set p2align Operands"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblySetP2AlignOperands::ID = 0; +INITIALIZE_PASS(WebAssemblySetP2AlignOperands, DEBUG_TYPE, + "Set the p2align operands for WebAssembly loads and stores", + false, false) + +FunctionPass *llvm::createWebAssemblySetP2AlignOperands() { + return new WebAssemblySetP2AlignOperands(); +} + +static void rewriteP2Align(MachineInstr &MI, unsigned OperandNo) { + assert(MI.getOperand(OperandNo).getImm() == 0 && + "ISel should set p2align operands to 0"); + assert(MI.hasOneMemOperand() && + "Load and store instructions have exactly one mem operand"); + assert((*MI.memoperands_begin())->getSize() == + (UINT64_C(1) << WebAssembly::GetDefaultP2Align(MI.getOpcode())) && + "Default p2align value should be natural"); + assert(MI.getDesc().OpInfo[OperandNo].OperandType == + WebAssembly::OPERAND_P2ALIGN && + "Load and store instructions should have a p2align operand"); + uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment()); + + // WebAssembly does not currently support supernatural alignment. + P2Align = std::min(P2Align, + uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode()))); + + MI.getOperand(OperandNo).setImm(P2Align); +} + +bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG({ + dbgs() << "********** Set p2align Operands **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + + for (auto &MBB : MF) { + for (auto &MI : MBB) { + int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx( + MI.getOpcode(), WebAssembly::OpName::p2align); + if (P2AlignOpNum != -1) { + rewriteP2Align(MI, P2AlignOpNum); + Changed = true; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp new file mode 100644 index 000000000000..196a74565285 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -0,0 +1,59 @@ +//===-- WebAssemblySubtarget.cpp - WebAssembly Subtarget Information ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the WebAssembly-specific subclass of +/// TargetSubtarget. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblySubtarget.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyInstrInfo.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-subtarget" + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "WebAssemblyGenSubtargetInfo.inc" + +WebAssemblySubtarget & +WebAssemblySubtarget::initializeSubtargetDependencies(StringRef FS) { + // Determine default and user-specified characteristics + + if (CPUString.empty()) + CPUString = "generic"; + + ParseSubtargetFeatures(CPUString, FS); + return *this; +} + +WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, + const std::string &CPU, + const std::string &FS, + const TargetMachine &TM) + : WebAssemblyGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), + TargetTriple(TT), FrameLowering(), + InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), + TLInfo(TM, *this) {} + +bool WebAssemblySubtarget::enableAtomicExpand() const { + // If atomics are disabled, atomic ops are lowered instead of expanded + return hasAtomics(); +} + +bool WebAssemblySubtarget::enableMachineScheduler() const { + // Disable the MachineScheduler for now. Even with ShouldTrackPressure set and + // enableMachineSchedDefaultSched overridden, it appears to have an overall + // negative effect for the kinds of register optimizations we're doing. + return false; +} + +bool WebAssemblySubtarget::useAA() const { return true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h new file mode 100644 index 000000000000..8db2120f9834 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -0,0 +1,115 @@ +//=- WebAssemblySubtarget.h - Define Subtarget for the WebAssembly -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the WebAssembly-specific subclass of +/// TargetSubtarget. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H + +#include "WebAssemblyFrameLowering.h" +#include "WebAssemblyISelLowering.h" +#include "WebAssemblyInstrInfo.h" +#include "WebAssemblySelectionDAGInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include <string> + +#define GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_HEADER +#include "WebAssemblyGenSubtargetInfo.inc" + +namespace llvm { + +// Defined in WebAssemblyGenSubtargetInfo.inc. +extern const SubtargetFeatureKV + WebAssemblyFeatureKV[WebAssembly::NumSubtargetFeatures]; + +class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { + enum SIMDEnum { + NoSIMD, + SIMD128, + UnimplementedSIMD128, + } SIMDLevel = NoSIMD; + + bool HasAtomics = false; + bool HasNontrappingFPToInt = false; + bool HasSignExt = false; + bool HasExceptionHandling = false; + bool HasBulkMemory = false; + bool HasMultivalue = false; + bool HasMutableGlobals = false; + bool HasTailCall = false; + + /// String name of used CPU. + std::string CPUString; + + /// What processor and OS we're targeting. + Triple TargetTriple; + + WebAssemblyFrameLowering FrameLowering; + WebAssemblyInstrInfo InstrInfo; + WebAssemblySelectionDAGInfo TSInfo; + WebAssemblyTargetLowering TLInfo; + + /// Initializes using CPUString and the passed in feature string so that we + /// can use initializer lists for subtarget initialization. + WebAssemblySubtarget &initializeSubtargetDependencies(StringRef FS); + +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + WebAssemblySubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM); + + const WebAssemblySelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const WebAssemblyFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const WebAssemblyTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const WebAssemblyInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } + const WebAssemblyRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); + } + const Triple &getTargetTriple() const { return TargetTriple; } + bool enableAtomicExpand() const override; + bool enableIndirectBrExpand() const override { return true; } + bool enableMachineScheduler() const override; + bool useAA() const override; + + // Predicates used by WebAssemblyInstrInfo.td. + bool hasAddr64() const { return TargetTriple.isArch64Bit(); } + bool hasSIMD128() const { return SIMDLevel >= SIMD128; } + bool hasUnimplementedSIMD128() const { + return SIMDLevel >= UnimplementedSIMD128; + } + bool hasAtomics() const { return HasAtomics; } + bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; } + bool hasSignExt() const { return HasSignExt; } + bool hasExceptionHandling() const { return HasExceptionHandling; } + bool hasBulkMemory() const { return HasBulkMemory; } + bool hasMultivalue() const { return HasMultivalue; } + bool hasMutableGlobals() const { return HasMutableGlobals; } + bool hasTailCall() const { return HasTailCall; } + + /// Parses features string setting specified subtarget options. Definition of + /// function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp new file mode 100644 index 000000000000..bdf5fe2620a4 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -0,0 +1,507 @@ +//===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the WebAssembly-specific subclass of TargetMachine. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetMachine.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "TargetInfo/WebAssemblyTargetInfo.h" +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyTargetObjectFile.h" +#include "WebAssemblyTargetTransformInfo.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Utils.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm" + +// Emscripten's asm.js-style exception handling +static cl::opt<bool> EnableEmException( + "enable-emscripten-cxx-exceptions", + cl::desc("WebAssembly Emscripten-style exception handling"), + cl::init(false)); + +// Emscripten's asm.js-style setjmp/longjmp handling +static cl::opt<bool> EnableEmSjLj( + "enable-emscripten-sjlj", + cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"), + cl::init(false)); + +extern "C" void LLVMInitializeWebAssemblyTarget() { + // Register the target. + RegisterTargetMachine<WebAssemblyTargetMachine> X( + getTheWebAssemblyTarget32()); + RegisterTargetMachine<WebAssemblyTargetMachine> Y( + getTheWebAssemblyTarget64()); + + // Register backend passes + auto &PR = *PassRegistry::getPassRegistry(); + initializeWebAssemblyAddMissingPrototypesPass(PR); + initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR); + initializeLowerGlobalDtorsPass(PR); + initializeFixFunctionBitcastsPass(PR); + initializeOptimizeReturnedPass(PR); + initializeWebAssemblyArgumentMovePass(PR); + initializeWebAssemblySetP2AlignOperandsPass(PR); + initializeWebAssemblyReplacePhysRegsPass(PR); + initializeWebAssemblyPrepareForLiveIntervalsPass(PR); + initializeWebAssemblyOptimizeLiveIntervalsPass(PR); + initializeWebAssemblyMemIntrinsicResultsPass(PR); + initializeWebAssemblyRegStackifyPass(PR); + initializeWebAssemblyRegColoringPass(PR); + initializeWebAssemblyFixIrreducibleControlFlowPass(PR); + initializeWebAssemblyLateEHPreparePass(PR); + initializeWebAssemblyExceptionInfoPass(PR); + initializeWebAssemblyCFGSortPass(PR); + initializeWebAssemblyCFGStackifyPass(PR); + initializeWebAssemblyExplicitLocalsPass(PR); + initializeWebAssemblyLowerBrUnlessPass(PR); + initializeWebAssemblyRegNumberingPass(PR); + initializeWebAssemblyPeepholePass(PR); + initializeWebAssemblyCallIndirectFixupPass(PR); +} + +//===----------------------------------------------------------------------===// +// WebAssembly Lowering public interface. +//===----------------------------------------------------------------------===// + +static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM, + const Triple &TT) { + if (!RM.hasValue()) { + // Default to static relocation model. This should always be more optimial + // than PIC since the static linker can determine all global addresses and + // assume direct function calls. + return Reloc::Static; + } + + if (!TT.isOSEmscripten()) { + // Relocation modes other than static are currently implemented in a way + // that only works for Emscripten, so disable them if we aren't targeting + // Emscripten. + return Reloc::Static; + } + + return *RM; +} + +/// Create an WebAssembly architecture model. +/// +WebAssemblyTargetMachine::WebAssemblyTargetMachine( + const Target &T, const Triple &TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Optional<Reloc::Model> RM, + Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine(T, + TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128" + : "e-m:e-p:32:32-i64:64-n32:64-S128", + TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT), + getEffectiveCodeModel(CM, CodeModel::Large), OL), + TLOF(new WebAssemblyTargetObjectFile()) { + // WebAssembly type-checks instructions, but a noreturn function with a return + // type that doesn't match the context will cause a check failure. So we lower + // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's + // 'unreachable' instructions which is meant for that case. + this->Options.TrapUnreachable = true; + + // WebAssembly treats each function as an independent unit. Force + // -ffunction-sections, effectively, so that we can emit them independently. + this->Options.FunctionSections = true; + this->Options.DataSections = true; + this->Options.UniqueSectionNames = true; + + initAsmInfo(); + + // Note that we don't use setRequiresStructuredCFG(true). It disables + // optimizations than we're ok with, and want, such as critical edge + // splitting and tail merging. +} + +WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor. + +const WebAssemblySubtarget * +WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, + std::string FS) const { + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + I = std::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this); + } + return I.get(); +} + +const WebAssemblySubtarget * +WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + + return getSubtargetImpl(CPU, FS); +} + +namespace { + +class CoalesceFeaturesAndStripAtomics final : public ModulePass { + // Take the union of all features used in the module and use it for each + // function individually, since having multiple feature sets in one module + // currently does not make sense for WebAssembly. If atomics are not enabled, + // also strip atomic operations and thread local storage. + static char ID; + WebAssemblyTargetMachine *WasmTM; + +public: + CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM) + : ModulePass(ID), WasmTM(WasmTM) {} + + bool runOnModule(Module &M) override { + FeatureBitset Features = coalesceFeatures(M); + + std::string FeatureStr = getFeatureString(Features); + for (auto &F : M) + replaceFeatures(F, FeatureStr); + + bool StrippedAtomics = false; + bool StrippedTLS = false; + + if (!Features[WebAssembly::FeatureAtomics]) + StrippedAtomics = stripAtomics(M); + + if (!Features[WebAssembly::FeatureBulkMemory]) + StrippedTLS = stripThreadLocals(M); + + if (StrippedAtomics && !StrippedTLS) + stripThreadLocals(M); + else if (StrippedTLS && !StrippedAtomics) + stripAtomics(M); + + recordFeatures(M, Features, StrippedAtomics || StrippedTLS); + + // Conservatively assume we have made some change + return true; + } + +private: + FeatureBitset coalesceFeatures(const Module &M) { + FeatureBitset Features = + WasmTM + ->getSubtargetImpl(WasmTM->getTargetCPU(), + WasmTM->getTargetFeatureString()) + ->getFeatureBits(); + for (auto &F : M) + Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits(); + return Features; + } + + std::string getFeatureString(const FeatureBitset &Features) { + std::string Ret; + for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { + if (Features[KV.Value]) + Ret += (StringRef("+") + KV.Key + ",").str(); + } + return Ret; + } + + void replaceFeatures(Function &F, const std::string &Features) { + F.removeFnAttr("target-features"); + F.removeFnAttr("target-cpu"); + F.addFnAttr("target-features", Features); + } + + bool stripAtomics(Module &M) { + // Detect whether any atomics will be lowered, since there is no way to tell + // whether the LowerAtomic pass lowers e.g. stores. + bool Stripped = false; + for (auto &F : M) { + for (auto &B : F) { + for (auto &I : B) { + if (I.isAtomic()) { + Stripped = true; + goto done; + } + } + } + } + + done: + if (!Stripped) + return false; + + LowerAtomicPass Lowerer; + FunctionAnalysisManager FAM; + for (auto &F : M) + Lowerer.run(F, FAM); + + return true; + } + + bool stripThreadLocals(Module &M) { + bool Stripped = false; + for (auto &GV : M.globals()) { + if (GV.getThreadLocalMode() != + GlobalValue::ThreadLocalMode::NotThreadLocal) { + Stripped = true; + GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal); + } + } + return Stripped; + } + + void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) { + for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { + std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); + if (KV.Value == WebAssembly::FeatureAtomics && Stripped) { + // "atomics" is special: code compiled without atomics may have had its + // atomics lowered to nonatomic operations. In that case, atomics is + // disallowed to prevent unsafe linking with atomics-enabled objects. + assert(!Features[WebAssembly::FeatureAtomics] || + !Features[WebAssembly::FeatureBulkMemory]); + M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, + wasm::WASM_FEATURE_PREFIX_DISALLOWED); + } else if (Features[KV.Value]) { + // Otherwise features are marked Used or not mentioned + M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, + wasm::WASM_FEATURE_PREFIX_USED); + } + } + } +}; +char CoalesceFeaturesAndStripAtomics::ID = 0; + +/// WebAssembly Code Generator Pass Configuration Options. +class WebAssemblyPassConfig final : public TargetPassConfig { +public: + WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { + return getTM<WebAssemblyTargetMachine>(); + } + + FunctionPass *createTargetRegisterAllocator(bool) override; + + void addIRPasses() override; + bool addInstSelector() override; + void addPostRegAlloc() override; + bool addGCPasses() override { return false; } + void addPreEmitPass() override; + + // No reg alloc + bool addRegAssignmentFast() override { return false; } + + // No reg alloc + bool addRegAssignmentOptimized() override { return false; } +}; +} // end anonymous namespace + +TargetTransformInfo +WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); +} + +TargetPassConfig * +WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { + return new WebAssemblyPassConfig(*this, PM); +} + +FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { + return nullptr; // No reg alloc +} + +//===----------------------------------------------------------------------===// +// The following functions are called from lib/CodeGen/Passes.cpp to modify +// the CodeGen pass sequence. +//===----------------------------------------------------------------------===// + +void WebAssemblyPassConfig::addIRPasses() { + // Runs LowerAtomicPass if necessary + addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); + + // This is a no-op if atomics are not used in the module + addPass(createAtomicExpandPass()); + + // Add signatures to prototype-less function declarations + addPass(createWebAssemblyAddMissingPrototypes()); + + // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls. + addPass(createWebAssemblyLowerGlobalDtors()); + + // Fix function bitcasts, as WebAssembly requires caller and callee signatures + // to match. + addPass(createWebAssemblyFixFunctionBitcasts()); + + // Optimize "returned" function attributes. + if (getOptLevel() != CodeGenOpt::None) + addPass(createWebAssemblyOptimizeReturned()); + + // If exception handling is not enabled and setjmp/longjmp handling is + // enabled, we lower invokes into calls and delete unreachable landingpad + // blocks. Lowering invokes when there is no EH support is done in + // TargetPassConfig::addPassesToHandleExceptions, but this runs after this + // function and SjLj handling expects all invokes to be lowered before. + if (!EnableEmException && + TM->Options.ExceptionModel == ExceptionHandling::None) { + addPass(createLowerInvokePass()); + // The lower invoke pass may create unreachable code. Remove it in order not + // to process dead blocks in setjmp/longjmp handling. + addPass(createUnreachableBlockEliminationPass()); + } + + // Handle exceptions and setjmp/longjmp if enabled. + if (EnableEmException || EnableEmSjLj) + addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException, + EnableEmSjLj)); + + // Expand indirectbr instructions to switches. + addPass(createIndirectBrExpandPass()); + + TargetPassConfig::addIRPasses(); +} + +bool WebAssemblyPassConfig::addInstSelector() { + (void)TargetPassConfig::addInstSelector(); + addPass( + createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); + // Run the argument-move pass immediately after the ScheduleDAG scheduler + // so that we can fix up the ARGUMENT instructions before anything else + // sees them in the wrong place. + addPass(createWebAssemblyArgumentMove()); + // Set the p2align operands. This information is present during ISel, however + // it's inconvenient to collect. Collect it now, and update the immediate + // operands. + addPass(createWebAssemblySetP2AlignOperands()); + return false; +} + +void WebAssemblyPassConfig::addPostRegAlloc() { + // TODO: The following CodeGen passes don't currently support code containing + // virtual registers. Consider removing their restrictions and re-enabling + // them. + + // These functions all require the NoVRegs property. + disablePass(&MachineCopyPropagationID); + disablePass(&PostRAMachineSinkingID); + disablePass(&PostRASchedulerID); + disablePass(&FuncletLayoutID); + disablePass(&StackMapLivenessID); + disablePass(&LiveDebugValuesID); + disablePass(&PatchableFunctionID); + disablePass(&ShrinkWrapID); + + // This pass hurts code size for wasm because it can generate irreducible + // control flow. + disablePass(&MachineBlockPlacementID); + + TargetPassConfig::addPostRegAlloc(); +} + +void WebAssemblyPassConfig::addPreEmitPass() { + TargetPassConfig::addPreEmitPass(); + + // Rewrite pseudo call_indirect instructions as real instructions. + // This needs to run before register stackification, because we change the + // order of the arguments. + addPass(createWebAssemblyCallIndirectFixup()); + + // Eliminate multiple-entry loops. + addPass(createWebAssemblyFixIrreducibleControlFlow()); + + // Do various transformations for exception handling. + // Every CFG-changing optimizations should come before this. + addPass(createWebAssemblyLateEHPrepare()); + + // Now that we have a prologue and epilogue and all frame indices are + // rewritten, eliminate SP and FP. This allows them to be stackified, + // colored, and numbered with the rest of the registers. + addPass(createWebAssemblyReplacePhysRegs()); + + // Preparations and optimizations related to register stackification. + if (getOptLevel() != CodeGenOpt::None) { + // LiveIntervals isn't commonly run this late. Re-establish preconditions. + addPass(createWebAssemblyPrepareForLiveIntervals()); + + // Depend on LiveIntervals and perform some optimizations on it. + addPass(createWebAssemblyOptimizeLiveIntervals()); + + // Prepare memory intrinsic calls for register stackifying. + addPass(createWebAssemblyMemIntrinsicResults()); + + // Mark registers as representing wasm's value stack. This is a key + // code-compression technique in WebAssembly. We run this pass (and + // MemIntrinsicResults above) very late, so that it sees as much code as + // possible, including code emitted by PEI and expanded by late tail + // duplication. + addPass(createWebAssemblyRegStackify()); + + // Run the register coloring pass to reduce the total number of registers. + // This runs after stackification so that it doesn't consider registers + // that become stackified. + addPass(createWebAssemblyRegColoring()); + } + + // Sort the blocks of the CFG into topological order, a prerequisite for + // BLOCK and LOOP markers. + addPass(createWebAssemblyCFGSort()); + + // Insert BLOCK and LOOP markers. + addPass(createWebAssemblyCFGStackify()); + + // Insert explicit local.get and local.set operators. + addPass(createWebAssemblyExplicitLocals()); + + // Lower br_unless into br_if. + addPass(createWebAssemblyLowerBrUnless()); + + // Perform the very last peephole optimizations on the code. + if (getOptLevel() != CodeGenOpt::None) + addPass(createWebAssemblyPeephole()); + + // Create a mapping from LLVM CodeGen virtual registers to wasm registers. + addPass(createWebAssemblyRegNumbering()); +} + +yaml::MachineFunctionInfo * +WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const { + return new yaml::WebAssemblyFunctionInfo(); +} + +yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML( + const MachineFunction &MF) const { + const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); + return new yaml::WebAssemblyFunctionInfo(*MFI); +} + +bool WebAssemblyTargetMachine::parseMachineFunctionInfo( + const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, SMRange &SourceRange) const { + const auto &YamlMFI = + reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI); + MachineFunction &MF = PFS.MF; + MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI); + return false; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h new file mode 100644 index 000000000000..850e6b9a9e9e --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -0,0 +1,63 @@ +// WebAssemblyTargetMachine.h - Define TargetMachine for WebAssembly -*- C++ -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the WebAssembly-specific subclass of +/// TargetMachine. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H + +#include "WebAssemblySubtarget.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class WebAssemblyTargetMachine final : public LLVMTargetMachine { + std::unique_ptr<TargetLoweringObjectFile> TLOF; + mutable StringMap<std::unique_ptr<WebAssemblySubtarget>> SubtargetMap; + +public: + WebAssemblyTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional<Reloc::Model> RM, + Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, + bool JIT); + + ~WebAssemblyTargetMachine() override; + + const WebAssemblySubtarget *getSubtargetImpl(std::string CPU, + std::string FS) const; + const WebAssemblySubtarget * + getSubtargetImpl(const Function &F) const override; + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + + bool usesPhysRegsForPEI() const override { return false; } + + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; + yaml::MachineFunctionInfo * + convertFuncInfoToYAML(const MachineFunction &MF) const override; + bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, + PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, + SMRange &SourceRange) const override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp new file mode 100644 index 000000000000..ad57c600db10 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp @@ -0,0 +1,24 @@ +//===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the functions of the WebAssembly-specific subclass +/// of TargetLoweringObjectFile. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetObjectFile.h" +#include "WebAssemblyTargetMachine.h" + +using namespace llvm; + +void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileWasm::Initialize(Ctx, TM); + InitializeWasm(); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h new file mode 100644 index 000000000000..f46bb2040a7d --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h @@ -0,0 +1,29 @@ +//===-- WebAssemblyTargetObjectFile.h - WebAssembly Object Info -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the WebAssembly-specific subclass of +/// TargetLoweringObjectFile. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + +class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileWasm { +public: + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp new file mode 100644 index 000000000000..1c53e90daea7 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -0,0 +1,83 @@ +//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the WebAssembly-specific TargetTransformInfo +/// implementation. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetTransformInfo.h" +#include "llvm/CodeGen/CostTable.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "wasmtti" + +TargetTransformInfo::PopcntSupportKind +WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + return TargetTransformInfo::PSK_FastHardware; +} + +unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + unsigned Result = BaseT::getNumberOfRegisters(ClassID); + + // For SIMD, use at least 16 registers, as a rough guess. + bool Vector = (ClassID == 1); + if (Vector) + Result = std::max(Result, 16u); + + return Result; +} + +unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const { + if (Vector && getST()->hasSIMD128()) + return 128; + + return 64; +} + +unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) { + + unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + + if (auto *VTy = dyn_cast<VectorType>(Ty)) { + switch (Opcode) { + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + // SIMD128's shifts currently only accept a scalar shift count. For each + // element, we'll need to extract, op, insert. The following is a rough + // approxmation. + if (Opd2Info != TTI::OK_UniformValue && + Opd2Info != TTI::OK_UniformConstantValue) + Cost = VTy->getNumElements() * + (TargetTransformInfo::TCC_Basic + + getArithmeticInstrCost(Opcode, VTy->getElementType()) + + TargetTransformInfo::TCC_Basic); + break; + } + } + return Cost; +} + +unsigned WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { + unsigned Cost = BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index); + + // SIMD128's insert/extract currently only take constant indices. + if (Index == -1u) + return Cost + 25 * TargetTransformInfo::TCC_Expensive; + + return Cost; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h new file mode 100644 index 000000000000..f0ecc73e91de --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -0,0 +1,72 @@ +//==- WebAssemblyTargetTransformInfo.h - WebAssembly-specific TTI -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific +/// to the WebAssembly target machine. +/// +/// It uses the target's detailed information to provide more precise answers to +/// certain TTI queries, while letting the target independent and default TTI +/// implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H + +#include "WebAssemblyTargetMachine.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include <algorithm> + +namespace llvm { + +class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> { + typedef BasicTTIImplBase<WebAssemblyTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const WebAssemblySubtarget *ST; + const WebAssemblyTargetLowering *TLI; + + const WebAssemblySubtarget *getST() const { return ST; } + const WebAssemblyTargetLowering *getTLI() const { return TLI; } + +public: + WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + /// \name Scalar TTI Implementations + /// @{ + + // TODO: Implement more Scalar TTI for WebAssembly + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(unsigned ClassID) const; + unsigned getRegisterBitWidth(bool Vector) const; + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>()); + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp new file mode 100644 index 000000000000..a237da8154ab --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -0,0 +1,81 @@ +//===-- WebAssemblyUtilities.cpp - WebAssembly Utility Functions ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements several utility functions for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyUtilities.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +using namespace llvm; + +const char *const WebAssembly::ClangCallTerminateFn = "__clang_call_terminate"; +const char *const WebAssembly::CxaBeginCatchFn = "__cxa_begin_catch"; +const char *const WebAssembly::CxaRethrowFn = "__cxa_rethrow"; +const char *const WebAssembly::StdTerminateFn = "_ZSt9terminatev"; +const char *const WebAssembly::PersonalityWrapperFn = + "_Unwind_Wasm_CallPersonality"; + +/// Test whether MI is a child of some other node in an expression tree. +bool WebAssembly::isChild(const MachineInstr &MI, + const WebAssemblyFunctionInfo &MFI) { + if (MI.getNumOperands() == 0) + return false; + const MachineOperand &MO = MI.getOperand(0); + if (!MO.isReg() || MO.isImplicit() || !MO.isDef()) + return false; + Register Reg = MO.getReg(); + return Register::isVirtualRegister(Reg) && MFI.isVRegStackified(Reg); +} + +bool WebAssembly::mayThrow(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case WebAssembly::THROW: + case WebAssembly::THROW_S: + case WebAssembly::RETHROW: + case WebAssembly::RETHROW_S: + return true; + } + if (isCallIndirect(MI.getOpcode())) + return true; + if (!MI.isCall()) + return false; + + const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode())); + assert(MO.isGlobal() || MO.isSymbol()); + + if (MO.isSymbol()) { + // Some intrinsics are lowered to calls to external symbols, which are then + // lowered to calls to library functions. Most of libcalls don't throw, but + // we only list some of them here now. + // TODO Consider adding 'nounwind' info in TargetLowering::CallLoweringInfo + // instead for more accurate info. + const char *Name = MO.getSymbolName(); + if (strcmp(Name, "memcpy") == 0 || strcmp(Name, "memmove") == 0 || + strcmp(Name, "memset") == 0) + return false; + return true; + } + + const auto *F = dyn_cast<Function>(MO.getGlobal()); + if (!F) + return true; + if (F->doesNotThrow()) + return false; + // These functions never throw + if (F->getName() == CxaBeginCatchFn || F->getName() == PersonalityWrapperFn || + F->getName() == ClangCallTerminateFn || F->getName() == StdTerminateFn) + return false; + + // TODO Can we exclude call instructions that are marked as 'nounwind' in the + // original LLVm IR? (Even when the callee may throw) + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h new file mode 100644 index 000000000000..26cf84de89b9 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -0,0 +1,51 @@ +//===-- WebAssemblyUtilities - WebAssembly Utility Functions ---*- C++ -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the WebAssembly-specific +/// utility functions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H + +#include "llvm/CodeGen/MachineBasicBlock.h" + +namespace llvm { + +class WebAssemblyFunctionInfo; + +namespace WebAssembly { + +bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI); +bool mayThrow(const MachineInstr &MI); + +// Exception-related function names +extern const char *const ClangCallTerminateFn; +extern const char *const CxaBeginCatchFn; +extern const char *const CxaRethrowFn; +extern const char *const StdTerminateFn; +extern const char *const PersonalityWrapperFn; + +/// Return the "bottom" block of an entity, which can be either a MachineLoop or +/// WebAssemblyException. This differs from MachineLoop::getBottomBlock in that +/// it works even if the entity is discontiguous. +template <typename T> MachineBasicBlock *getBottom(const T *Unit) { + MachineBasicBlock *Bottom = Unit->getHeader(); + for (MachineBasicBlock *MBB : Unit->blocks()) + if (MBB->getNumber() > Bottom->getNumber()) + Bottom = MBB; + return Bottom; +} + +} // end namespace WebAssembly + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt new file mode 100644 index 000000000000..701b347bcbd7 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -0,0 +1,120 @@ +# Tests which are known to fail from the GCC torture test suite. + +# Syntax: Each line has a single test to be marked as a 'known failure' (or +# 'exclusion'. Known failures are expected to fail, and will cause an error if +# they pass. (Known failures that do not run at all will not cause an +# error). The format is +# <name> <attributes> # comment + +# blockaddress without an indirectbr still can't be supported +20071220-1.c O2 # Relocation against a BB address +20071220-2.c +990208-1.c +label13.C O0 +label13a.C O0 +label3.C + +# WebAssembly hasn't implemented (will never?) __builtin_return_address +20010122-1.c +20030323-1.c +20030811-1.c +pr17377.c + +# Error: invalid output constraint '=t' in asm. +990413-2.c + +# Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target. +built-in-setjmp.c +pr60003.c + +# Error in the program / unsupported by Clang. +20000822-1.c +20010209-1.c +20010605-1.c +20030501-1.c +20040520-1.c +20061220-1.c +20090219-1.c +920415-1.c +920428-2.c +920501-7.c +920612-2.c +920721-4.c +921017-1.c +921215-1.c +931002-1.c +comp-goto-2.c +nest-align-1.c +nest-stdar-1.c +nestfunc-1.c +nestfunc-2.c +nestfunc-3.c +nestfunc-5.c +nestfunc-6.c +nestfunc-7.c +pr22061-3.c +pr22061-4.c +pr24135.c +pr51447.c +20020412-1.c +20040308-1.c +20040423-1.c +20041218-2.c +20070919-1.c +align-nest.c +pr41935.c +920302-1.c +920501-3.c +920728-1.c +pr28865.c +attr-alias-1.C +attr-alias-2.C +attr-ifunc-1.C +attr-ifunc-2.C +attr-ifunc-3.C +attr-ifunc-4.C +complit12.C +va-arg-pack-1.C +va-arg-pack-len-1.C +builtin-line1.C +devirt-6.C # bad main signature +devirt-13.C # bad main signature +devirt-14.C # bad main signature +devirt-21.C # bad main signature +devirt-23.C # bad main signature +lifetime2.C # violates C++ DR1696 + +# WASI doesn't have stdjmp.h yet +pr56982.c +simd-2.C + +# WASI doesn't have pthread.h yet +thread_local3.C +thread_local3g.C +thread_local4.C +thread_local4g.C +thread_local5.C +thread_local5g.C + +# Untriaged C++ failures +spec5.C +addr1.C +ef_test.C +member2.C +new39.C +new40.C +nrv8.C +offsetof9.C +opaque-1.C +pr19650.C +pr37146-1.C +pr46149.C +pr59470.C +rtti2.C +self1.C +type-generic-1.C +vbase8-10.C +vbase8-21.C +vbase8-22.C +vbase8-4.C +vector1.C |