diff options
Diffstat (limited to 'lib/Target/WebAssembly/WebAssemblyISelLowering.cpp')
-rw-r--r-- | lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 556 |
1 files changed, 392 insertions, 164 deletions
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 003848e34227..4064a983099c 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1,9 +1,8 @@ //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -46,9 +45,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setBooleanContents(ZeroOrOneBooleanContent); // Except in SIMD vectors setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - // WebAssembly does not produce floating-point exceptions on normal floating - // point operations. - setHasFloatingPointExceptions(false); // We don't know the microarchitecture here, so just reduce register pressure. setSchedulingPreference(Sched::RegPressure); // Tell ISel that we have a stack pointer. @@ -64,10 +60,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); - if (Subtarget->hasUnimplementedSIMD128()) { - addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); - addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); - } + } + if (Subtarget->hasUnimplementedSIMD128()) { + addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -111,56 +107,62 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTruncStoreAction(T, MVT::f16, Expand); } - // Support saturating add for i8x16 and i16x8 - if (Subtarget->hasSIMD128()) - for (auto T : {MVT::v16i8, MVT::v8i16}) - for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) - setOperationAction(Op, T, Legal); - // Expand unavailable integer operations. for (auto Op : {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { - for (auto T : {MVT::i32, MVT::i64}) { + for (auto T : {MVT::i32, MVT::i64}) setOperationAction(Op, T, Expand); - } - if (Subtarget->hasSIMD128()) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) { + if (Subtarget->hasSIMD128()) + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(Op, T, Expand); - } - if (Subtarget->hasUnimplementedSIMD128()) { - setOperationAction(Op, MVT::v2i64, Expand); - } - } + if (Subtarget->hasUnimplementedSIMD128()) + setOperationAction(Op, MVT::v2i64, Expand); } - // There is no i64x2.mul instruction - setOperationAction(ISD::MUL, MVT::v2i64, Expand); - - // We have custom shuffle lowering to expose the shuffle mask + // SIMD-specific configuration if (Subtarget->hasSIMD128()) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) { + // Support saturating add for i8x16 and i16x8 + for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) + for (auto T : {MVT::v16i8, MVT::v8i16}) + setOperationAction(Op, T, Legal); + + // Custom lower BUILD_VECTORs to minimize number of replace_lanes + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + if (Subtarget->hasUnimplementedSIMD128()) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + + // We have custom shuffle lowering to expose the shuffle mask + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); - } - if (Subtarget->hasUnimplementedSIMD128()) { - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); - } - } + if (Subtarget->hasUnimplementedSIMD128()) + for (auto T: {MVT::v2i64, MVT::v2f64}) + setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); - // Custom lowering since wasm shifts must have a scalar shift amount - if (Subtarget->hasSIMD128()) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) - for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) + // Custom lowering since wasm shifts must have a scalar shift amount + for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(Op, T, Custom); - if (Subtarget->hasUnimplementedSIMD128()) - for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) + if (Subtarget->hasUnimplementedSIMD128()) setOperationAction(Op, MVT::v2i64, Custom); - } + } - // There are no select instructions for vectors - if (Subtarget->hasSIMD128()) + // Custom lower lane accesses to expand out variable indices + for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + setOperationAction(Op, T, Custom); + if (Subtarget->hasUnimplementedSIMD128()) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(Op, T, Custom); + } + + // There is no i64x2.mul instruction + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + + // There are no vector select instructions for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) setOperationAction(Op, T, Expand); @@ -169,6 +171,31 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(Op, T, Expand); } + // Expand integer operations supported for scalars but not SIMD + for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) + setOperationAction(Op, T, Expand); + if (Subtarget->hasUnimplementedSIMD128()) + setOperationAction(Op, MVT::v2i64, Expand); + } + + // Expand float operations supported for scalars but not SIMD + for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, + ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, + ISD::FEXP, ISD::FEXP2, ISD::FRINT}) { + setOperationAction(Op, MVT::v4f32, Expand); + if (Subtarget->hasUnimplementedSIMD128()) + setOperationAction(Op, MVT::v2f64, Expand); + } + + // Expand additional SIMD ops that V8 hasn't implemented yet + if (!Subtarget->hasUnimplementedSIMD128()) { + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + } + } + // As a special case, these operators use the type to mean the type to // sign-extend from. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -220,25 +247,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( } } - // Expand additional SIMD ops that V8 hasn't implemented yet - if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) { - setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); - setOperationAction(ISD::FDIV, MVT::v4f32, Expand); - } - - // Custom lower lane accesses to expand out variable indices - if (Subtarget->hasSIMD128()) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) { - setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); - } - if (Subtarget->hasUnimplementedSIMD128()) { - for (auto T : {MVT::v2i64, MVT::v2f64}) { - setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); - } - } - } + // Don't do anything clever with build_pairs + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // Trap lowers to wasm unreachable setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -248,6 +258,31 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setMaxAtomicSizeInBitsSupported(64); + + if (Subtarget->hasBulkMemory()) { + // Use memory.copy and friends over multiple loads and stores + MaxStoresPerMemcpy = 1; + MaxStoresPerMemcpyOptSize = 1; + MaxStoresPerMemmove = 1; + MaxStoresPerMemmoveOptSize = 1; + MaxStoresPerMemset = 1; + MaxStoresPerMemsetOptSize = 1; + } + + // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is + // consistent with the f64 and f128 names. + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); + + // Define the emscripten name for return address helper. + // TODO: when implementing other WASM backends, make this generic or only do + // this on emscripten depending on what they end up doing. + setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address"); + + // Always convert switches to br_tables unless there is only one case, which + // is equivalent to a simple branch. This reduces code size for wasm, and we + // defer possible jump table optimizations to the VM. + setMinimumJumpTableEntries(2); } TargetLowering::AtomicExpansionKind @@ -272,12 +307,6 @@ FastISel *WebAssemblyTargetLowering::createFastISel( return WebAssembly::createFastISel(FuncInfo, LibInfo); } -bool WebAssemblyTargetLowering::isOffsetFoldingLegal( - const GlobalAddressSDNode * /*GA*/) const { - // All offsets can be folded. - return true; -} - MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, EVT VT) const { unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); @@ -324,11 +353,11 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, auto &Context = BB->getParent()->getFunction().getContext(); Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); + const BasicBlock *LLVMBB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); - MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); + MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); + MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); MachineFunction::iterator It = ++BB->getIterator(); F->insert(It, FalseMBB); @@ -336,8 +365,7 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, F->insert(It, DoneMBB); // Transfer the remainder of BB and its successor edges to DoneMBB. - DoneMBB->splice(DoneMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); + DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); DoneMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(TrueMBB); @@ -502,7 +530,8 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, } bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( - EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const { + EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, + MachineMemOperand::Flags /*Flags*/, bool *Fast) const { // WebAssembly supports unaligned accesses, though it should be declared // with the p2align attribute on loads and stores which do so, and there // may be a performance impact. We tell LLVM they're "fast" because @@ -578,14 +607,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // Lowering Code //===----------------------------------------------------------------------===// -static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) { +static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( - DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc())); + DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); } // Test whether the given calling convention is supported. -static bool CallingConvSupported(CallingConv::ID CallConv) { +static bool callingConvSupported(CallingConv::ID CallConv) { // We currently support the language-independent target-independent // conventions. We don't yet have a way to annotate calls with properties like // "cold", and we don't have any call-clobbered registers, so these are mostly @@ -608,20 +637,21 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, auto Layout = MF.getDataLayout(); CallingConv::ID CallConv = CLI.CallConv; - if (!CallingConvSupported(CallConv)) + if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support language-specific or target-specific " "calling conventions yet"); if (CLI.IsPatchPoint) fail(DL, DAG, "WebAssembly doesn't support patch point yet"); - // WebAssembly doesn't currently support explicit tail calls. If they are - // required, fail. Otherwise, just disable them. - if ((CallConv == CallingConv::Fast && CLI.IsTailCall && - MF.getTarget().Options.GuaranteedTailCallOpt) || - (CLI.CS && CLI.CS.isMustTailCall())) - fail(DL, DAG, "WebAssembly doesn't support tail call yet"); - CLI.IsTailCall = false; + // Fail if tail calls are required but not enabled + if (!Subtarget->hasTailCall()) { + if ((CallConv == CallingConv::Fast && CLI.IsTailCall && + MF.getTarget().Options.GuaranteedTailCallOpt) || + (CLI.CS && CLI.CS.isMustTailCall())) + fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); + CLI.IsTailCall = false; + } SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; if (Ins.size() > 1) @@ -630,9 +660,9 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; unsigned NumFixedArgs = 0; - for (unsigned i = 0; i < Outs.size(); ++i) { - const ISD::OutputArg &Out = Outs[i]; - SDValue &OutVal = OutVals[i]; + for (unsigned I = 0; I < Outs.size(); ++I) { + const ISD::OutputArg &Out = Outs[I]; + SDValue &OutVal = OutVals[I]; if (Out.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (Out.Flags.isInAlloca()) @@ -669,13 +699,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsVarArg) { // Outgoing non-fixed arguments are placed in a buffer. First // compute their offsets and the total amount of buffer space needed. - for (SDValue Arg : - make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { + const ISD::OutputArg &Out = Outs[I]; + SDValue &Arg = OutVals[I]; EVT VT = Arg.getValueType(); assert(VT != MVT::iPTR && "Legalized args should be concrete"); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Align = std::max(Out.Flags.getOrigAlign(), + Layout.getABITypeAlignment(Ty)); unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), - Layout.getABITypeAlignment(Ty)); + Align); CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), Offset, VT.getSimpleVT(), CCValAssign::Full)); @@ -711,6 +744,18 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, FINode = DAG.getIntPtrConstant(0, DL); } + if (Callee->getOpcode() == ISD::GlobalAddress) { + // If the callee is a GlobalAddress node (quite common, every direct call + // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress + // doesn't at MO_GOT which is not needed for direct calls. + GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee); + Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, + getPointerTy(DAG.getDataLayout()), + GA->getOffset()); + Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, + getPointerTy(DAG.getDataLayout()), Callee); + } + // Compute the operands for the CALLn node. SmallVector<SDValue, 16> Ops; Ops.push_back(Chain); @@ -739,6 +784,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, // registers. InTys.push_back(In.VT); } + + if (CLI.IsTailCall) { + // ret_calls do not return values to the current frame + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops); + } + InTys.push_back(MVT::Other); SDVTList InTyList = DAG.getVTList(InTys); SDValue Res = @@ -768,7 +820,7 @@ SDValue WebAssemblyTargetLowering::LowerReturn( const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); - if (!CallingConvSupported(CallConv)) + if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); SmallVector<SDValue, 4> RetOps(1, Chain); @@ -795,7 +847,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - if (!CallingConvSupported(CallConv)) + if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); MachineFunction &MF = DAG.getMachineFunction(); @@ -842,7 +894,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( // Record the number and types of arguments and results. SmallVector<MVT, 4> Params; SmallVector<MVT, 4> Results; - ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(), + computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(), DAG.getTarget(), Params, Results); for (MVT VT : Results) MFI->addResult(VT); @@ -855,6 +907,21 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( return Chain; } +void WebAssemblyTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { + switch (N->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + // Do not add any results, signifying that N should not be custom lowered + // after all. This happens because simd128 turns on custom lowering for + // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an + // illegal type. + break; + default: + llvm_unreachable( + "ReplaceNodeResults not implemented for this op for WebAssembly!"); + } +} + //===----------------------------------------------------------------------===// // Custom lowering hooks. //===----------------------------------------------------------------------===// @@ -882,22 +949,23 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, case ISD::BRIND: fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); return SDValue(); - case ISD::RETURNADDR: // Probably nothing meaningful can be returned here. - fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address"); - return SDValue(); + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::CopyToReg: return LowerCopyToReg(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: - return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: case ISD::INSERT_VECTOR_ELT: return LowerAccessVectorElement(Op, DAG); case ISD::INTRINSIC_VOID: - return LowerINTRINSIC_VOID(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + return LowerIntrinsic(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SHL: @@ -939,6 +1007,26 @@ SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, return DAG.getTargetFrameIndex(FI, Op.getValueType()); } +SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + if (!Subtarget->getTargetTriple().isOSEmscripten()) { + fail(DL, DAG, + "Non-Emscripten WebAssembly hasn't implemented " + "__builtin_return_address"); + return SDValue(); + } + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), + {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL) + .first; +} + SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // Non-zero depths are not supported by WebAssembly currently. Use the @@ -963,9 +1051,40 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, "Unexpected target flags on generic GlobalAddressSDNode"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); - return DAG.getNode( - WebAssemblyISD::Wrapper, DL, VT, - DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); + + unsigned OperandFlags = 0; + if (isPositionIndependent()) { + const GlobalValue *GV = GA->getGlobal(); + if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) { + MachineFunction &MF = DAG.getMachineFunction(); + MVT PtrVT = getPointerTy(MF.getDataLayout()); + const char *BaseName; + if (GV->getValueType()->isFunctionTy()) { + BaseName = MF.createExternalSymbolName("__table_base"); + OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; + } + else { + BaseName = MF.createExternalSymbolName("__memory_base"); + OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; + } + SDValue BaseAddr = + DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, + DAG.getTargetExternalSymbol(BaseName, PtrVT)); + + SDValue SymAddr = DAG.getNode( + WebAssemblyISD::WrapperPIC, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), + OperandFlags)); + + return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); + } else { + OperandFlags = WebAssemblyII::MO_GOT; + } + } + + return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, + GA->getOffset(), OperandFlags)); } SDValue @@ -976,15 +1095,8 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, EVT VT = Op.getValueType(); assert(ES->getTargetFlags() == 0 && "Unexpected target flags on generic ExternalSymbolSDNode"); - // Set the TargetFlags to 0x1 which indicates that this is a "function" - // symbol rather than a data symbol. We do this unconditionally even though - // we don't know anything about the symbol other than its name, because all - // external symbols used in target-independent SelectionDAG code are for - // functions. - return DAG.getNode( - WebAssemblyISD::Wrapper, DL, VT, - DAG.getTargetExternalSymbol(ES->getSymbol(), VT, - WebAssemblyII::MO_SYMBOL_FUNCTION)); + return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); } SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, @@ -1038,17 +1150,28 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, MachinePointerInfo(SV), 0); } -SDValue -WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) const { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); +SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned IntNo; + switch (Op.getOpcode()) { + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: + IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + break; + case ISD::INTRINSIC_WO_CHAIN: + IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + break; + default: + llvm_unreachable("Invalid intrinsic"); + } SDLoc DL(Op); + switch (IntNo) { default: - return {}; // Don't custom lower most intrinsics. + return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::wasm_lsda: { - MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); @@ -1058,43 +1181,24 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, DAG.getMCSymbol(S, PtrVT)); } - } -} - -SDValue -WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op, - SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - SDLoc DL(Op); - - switch (IntNo) { - default: - return {}; // Don't custom lower most intrinsics. case Intrinsic::wasm_throw: { + // We only support C++ exceptions for now int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); - switch (Tag) { - case CPP_EXCEPTION: { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); - const char *SymName = MF.createExternalSymbolName("__cpp_exception"); - SDValue SymNode = - DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, - DAG.getTargetExternalSymbol( - SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT)); - return DAG.getNode(WebAssemblyISD::THROW, DL, - MVT::Other, // outchain type - { - Op.getOperand(0), // inchain - SymNode, // exception symbol - Op.getOperand(3) // thrown value - }); - } - default: + if (Tag != CPP_EXCEPTION) llvm_unreachable("Invalid tag!"); - } - break; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + const char *SymName = MF.createExternalSymbolName("__cpp_exception"); + SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, + DAG.getTargetExternalSymbol(SymName, PtrVT)); + return DAG.getNode(WebAssemblyISD::THROW, DL, + MVT::Other, // outchain type + { + Op.getOperand(0), // inchain + SymNode, // exception symbol + Op.getOperand(3) // thrown value + }); } } } @@ -1102,6 +1206,7 @@ WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); // If sign extension operations are disabled, allow sext_inreg only if operand // is a vector extract. SIMD does not depend on sign extension operations, but // allowing sext_inreg in this context lets us have simple patterns to select @@ -1109,12 +1214,136 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // simpler in this file, but would necessitate large and brittle patterns to // undo the expansion and select extract_lane_s instructions. assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); - if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) - return Op; + if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + const SDValue &Extract = Op.getOperand(0); + MVT VecT = Extract.getOperand(0).getSimpleValueType(); + MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode()) + ->getVT() + .getSimpleVT(); + MVT ExtractedVecT = + MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); + if (ExtractedVecT == VecT) + return Op; + // Bitcast vector to appropriate type to ensure ISel pattern coverage + const SDValue &Index = Extract.getOperand(1); + unsigned IndexVal = + static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue(); + unsigned Scale = + ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); + assert(Scale > 1); + SDValue NewIndex = + DAG.getConstant(IndexVal * Scale, DL, Index.getValueType()); + SDValue NewExtract = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), + DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), + NewExtract, Op.getOperand(1)); + } // Otherwise expand return SDValue(); } +SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const EVT VecT = Op.getValueType(); + const EVT LaneT = Op.getOperand(0).getValueType(); + const size_t Lanes = Op.getNumOperands(); + auto IsConstant = [](const SDValue &V) { + return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; + }; + + // Find the most common operand, which is approximately the best to splat + using Entry = std::pair<SDValue, size_t>; + SmallVector<Entry, 16> ValueCounts; + size_t NumConst = 0, NumDynamic = 0; + for (const SDValue &Lane : Op->op_values()) { + if (Lane.isUndef()) { + continue; + } else if (IsConstant(Lane)) { + NumConst++; + } else { + NumDynamic++; + } + auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(), + [&Lane](Entry A) { return A.first == Lane; }); + if (CountIt == ValueCounts.end()) { + ValueCounts.emplace_back(Lane, 1); + } else { + CountIt->second++; + } + } + auto CommonIt = + std::max_element(ValueCounts.begin(), ValueCounts.end(), + [](Entry A, Entry B) { return A.second < B.second; }); + assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector"); + SDValue SplatValue = CommonIt->first; + size_t NumCommon = CommonIt->second; + + // If v128.const is available, consider using it instead of a splat + if (Subtarget->hasUnimplementedSIMD128()) { + // {i32,i64,f32,f64}.const opcode, and value + const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes); + // SIMD prefix and opcode + const size_t SplatBytes = 2; + const size_t SplatConstBytes = SplatBytes + ConstBytes; + // SIMD prefix, opcode, and lane index + const size_t ReplaceBytes = 3; + const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes; + // SIMD prefix, v128.const opcode, and 128-bit value + const size_t VecConstBytes = 18; + // Initial v128.const and a replace_lane for each non-const operand + const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes; + // Initial splat and all necessary replace_lanes + const size_t SplatInitBytes = + IsConstant(SplatValue) + // Initial constant splat + ? (SplatConstBytes + + // Constant replace_lanes + (NumConst - NumCommon) * ReplaceConstBytes + + // Dynamic replace_lanes + (NumDynamic * ReplaceBytes)) + // Initial dynamic splat + : (SplatBytes + + // Constant replace_lanes + (NumConst * ReplaceConstBytes) + + // Dynamic replace_lanes + (NumDynamic - NumCommon) * ReplaceBytes); + if (ConstInitBytes < SplatInitBytes) { + // Create build_vector that will lower to initial v128.const + SmallVector<SDValue, 16> ConstLanes; + for (const SDValue &Lane : Op->op_values()) { + if (IsConstant(Lane)) { + ConstLanes.push_back(Lane); + } else if (LaneT.isFloatingPoint()) { + ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); + } else { + ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); + } + } + SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes); + // Add replace_lane instructions for non-const lanes + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (!Lane.isUndef() && !IsConstant(Lane)) + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, + DAG.getConstant(I, DL, MVT::i32)); + } + return Result; + } + } + // Use a splat for the initial vector + SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); + // Add replace_lane instructions for other values + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (Lane != SplatValue) + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, + DAG.getConstant(I, DL, MVT::i32)); + } + return Result; +} + SDValue WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { @@ -1131,11 +1360,10 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, Ops[OpIdx++] = Op.getOperand(1); // Expand mask indices to byte indices and materialize them as operands - for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) { + for (int M : Mask) { for (size_t J = 0; J < LaneBytes; ++J) { // Lower undefs (represented by -1 in mask) to zero - uint64_t ByteIndex = - Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J; + uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); } } @@ -1155,7 +1383,7 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, return SDValue(); } -static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) { +static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { EVT LaneT = Op.getSimpleValueType().getVectorElementType(); // 32-bit and 64-bit unrolled shifts will have proper semantics if (LaneT.bitsGE(MVT::i32)) @@ -1190,17 +1418,17 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, // Expand all vector shifts until V8 fixes its implementation // TODO: remove this once V8 is fixed if (!Subtarget->hasUnimplementedSIMD128()) - return UnrollVectorShift(Op, DAG); + return unrollVectorShift(Op, DAG); // Unroll non-splat vector shifts BuildVectorSDNode *ShiftVec; SDValue SplatVal; if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) || !(SplatVal = ShiftVec->getSplatValue())) - return UnrollVectorShift(Op, DAG); + return unrollVectorShift(Op, DAG); // All splats except i64x2 const splats are handled by patterns - ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal); + auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal); if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64) return Op; |