diff options
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 164 |
1 files changed, 71 insertions, 93 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 92f71c687c461..f45cc06e0a0a3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -31,7 +31,6 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -88,11 +87,6 @@ static cl::opt<bool> UsePrecSqrtF32( cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true)); -static cl::opt<bool> FtzEnabled( - "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), - cl::init(false)); - int NVPTXTargetLowering::getDivF32Level() const { if (UsePrecDivF32.getNumOccurrences() > 0) { // If nvptx-prec-div32=N is used on the command-line, always honor it @@ -117,18 +111,8 @@ bool NVPTXTargetLowering::usePrecSqrtF32() const { } bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { - // TODO: Get rid of this flag; there can be only one way to do this. - if (FtzEnabled.getNumOccurrences() > 0) { - // If nvptx-f32ftz is used on the command-line, always honor it - return FtzEnabled; - } else { - const Function &F = MF.getFunction(); - // Otherwise, check for an nvptx-f32ftz attribute on the function - if (F.hasFnAttribute("nvptx-f32ftz")) - return F.getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; - else - return false; - } + return MF.getDenormalMode(APFloat::IEEEsingle()).Output == + DenormalMode::PreserveSign; } static bool IsPTXVectorType(MVT VT) { @@ -233,11 +217,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, // covered by the vector op. Otherwise, it returns 1. static unsigned CanMergeParamLoadStoresStartingAt( unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs, - const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) { - assert(isPowerOf2_32(AccessSize) && "must be a power of 2!"); + const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) { // Can't vectorize if param alignment is not sufficient. - if (AccessSize > ParamAlignment) + if (ParamAlignment < AccessSize) return 1; // Can't vectorize if offset is not aligned. if (Offsets[Idx] & (AccessSize - 1)) @@ -297,7 +280,7 @@ enum ParamVectorizationFlags { static SmallVector<ParamVectorizationFlags, 16> VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs, const SmallVectorImpl<uint64_t> &Offsets, - unsigned ParamAlignment) { + Align ParamAlignment) { // Set vector size to match ValueVTs and mark all elements as // scalars by default. SmallVector<ParamVectorizationFlags, 16> VectorInfo; @@ -1258,8 +1241,8 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { std::string NVPTXTargetLowering::getPrototype( const DataLayout &DL, Type *retTy, const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, - ImmutableCallSite CS) const { + const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment, + const CallBase &CB) const { auto PtrVT = getPointerTy(DL); bool isABI = (STI.getSmVersion() >= 20); @@ -1294,8 +1277,8 @@ std::string NVPTXTargetLowering::getPrototype( O << ".param .b" << PtrVT.getSizeInBits() << " _"; } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) { - O << ".param .align " << retAlignment << " .b8 _[" - << DL.getTypeAllocSize(retTy) << "]"; + O << ".param .align " << (retAlignment ? retAlignment->value() : 0) + << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; } else { llvm_unreachable("Unknown return type"); } @@ -1316,7 +1299,7 @@ std::string NVPTXTargetLowering::getPrototype( if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { unsigned align = 0; - const CallInst *CallI = cast<CallInst>(CS.getInstruction()); + const CallInst *CallI = cast<CallInst>(&CB); // +1 because index 0 is reserved for return type alignment if (!getAlign(*CallI, i + 1, align)) align = DL.getABITypeAlignment(Ty); @@ -1358,9 +1341,9 @@ std::string NVPTXTargetLowering::getPrototype( assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); - unsigned align = Outs[OIdx].Flags.getByValAlign(); + Align align = Outs[OIdx].Flags.getNonZeroByValAlign(); unsigned sz = DL.getTypeAllocSize(ETy); - O << ".param .align " << align << " .b8 "; + O << ".param .align " << align.value() << " .b8 "; O << "_"; O << "[" << sz << "]"; } @@ -1368,31 +1351,29 @@ std::string NVPTXTargetLowering::getPrototype( return O.str(); } -unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, - ImmutableCallSite CS, - Type *Ty, unsigned Idx, - const DataLayout &DL) const { - if (!CS) { +Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, + const CallBase *CB, Type *Ty, + unsigned Idx, + const DataLayout &DL) const { + if (!CB) { // CallSite is zero, fallback to ABI type alignment - return DL.getABITypeAlignment(Ty); + return DL.getABITypeAlign(Ty); } - unsigned Align = 0; - const Value *DirectCallee = CS.getCalledFunction(); + unsigned Alignment = 0; + const Function *DirectCallee = CB->getCalledFunction(); if (!DirectCallee) { // We don't have a direct function symbol, but that may be because of // constant cast instructions in the call. - const Instruction *CalleeI = CS.getInstruction(); - assert(CalleeI && "Call target is not a function or derived value?"); // With bitcast'd call targets, the instruction will be the call - if (isa<CallInst>(CalleeI)) { + if (const auto *CI = dyn_cast<CallInst>(CB)) { // Check if we have call alignment metadata - if (getAlign(*cast<CallInst>(CalleeI), Idx, Align)) - return Align; + if (getAlign(*CI, Idx, Alignment)) + return Align(Alignment); - const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); + const Value *CalleeV = CI->getCalledOperand(); // Ignore any bitcast instructions while (isa<ConstantExpr>(CalleeV)) { const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); @@ -1404,20 +1385,20 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // We have now looked past all of the bitcasts. Do we finally have a // Function? - if (isa<Function>(CalleeV)) - DirectCallee = CalleeV; + if (const auto *CalleeF = dyn_cast<Function>(CalleeV)) + DirectCallee = CalleeF; } } // Check for function alignment information if we found that the // ultimate target is a Function if (DirectCallee) - if (getAlign(*cast<Function>(DirectCallee), Idx, Align)) - return Align; + if (getAlign(*DirectCallee, Idx, Alignment)) + return Align(Alignment); // Call is indirect or alignment information is not available, fall back to // the ABI type alignment - return DL.getABITypeAlignment(Ty); + return DL.getABITypeAlign(Ty); } SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -1432,7 +1413,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; ArgListTy &Args = CLI.getArgs(); Type *RetTy = CLI.RetTy; - ImmutableCallSite CS = CLI.CS; + const CallBase *CB = CLI.CB; const DataLayout &DL = DAG.getDataLayout(); bool isABI = (STI.getSmVersion() >= 20); @@ -1465,15 +1446,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<EVT, 16> VTs; SmallVector<uint64_t, 16> Offsets; ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); - unsigned ArgAlign = - getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); + Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL); unsigned AllocSize = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); bool NeedAlign; // Does argument declaration specify alignment? if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { // declare .param .align <align> .b8 .param<n>[<size>]; SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), + Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), DAG.getConstant(paramCount, dl, MVT::i32), DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, @@ -1554,8 +1534,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust type of the store op if we've extended the scalar // return value. EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; - unsigned EltAlign = - NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0; + MaybeAlign EltAlign; + if (NeedAlign) + EltAlign = commonAlignment(ArgAlign, Offsets[j]); Chain = DAG.getMemIntrinsicNode( Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, @@ -1585,7 +1566,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // declare .param .align <align> .b8 .param<n>[<size>]; unsigned sz = Outs[OIdx].Flags.getByValSize(); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); + Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, // so we don't need to worry about natural alignment or not. // See TargetLowering::LowerCallTo(). @@ -1593,18 +1574,19 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Enforce minumum alignment of 4 to work around ptxas miscompile // for sm_50+. See corresponding alignment adjustment in // emitFunctionParamList() for details. - if (ArgAlign < 4) - ArgAlign = 4; - SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(sz, dl, MVT::i32), InFlag}; + if (ArgAlign < Align(4)) + ArgAlign = Align(4); + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(sz, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps); InFlag = Chain.getValue(1); for (unsigned j = 0, je = VTs.size(); j != je; ++j) { EVT elemtype = VTs[j]; int curOffset = Offsets[j]; - unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); + unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset); auto PtrVT = getPointerTy(DL); SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], DAG.getConstant(curOffset, dl, PtrVT)); @@ -1618,10 +1600,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(paramCount, dl, MVT::i32), DAG.getConstant(curOffset, dl, MVT::i32), theVal, InFlag }; - Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, - CopyParamOps, elemtype, - MachinePointerInfo(), /* Align */ 0, - MachineMemOperand::MOStore); + Chain = DAG.getMemIntrinsicNode( + NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, + MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore); InFlag = Chain.getValue(1); } @@ -1629,7 +1610,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); - unsigned retAlignment = 0; + MaybeAlign retAlignment = None; // Handle Result if (Ins.size() > 0) { @@ -1657,12 +1638,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DeclareRetOps); InFlag = Chain.getValue(1); } else { - retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL); + assert(retAlignment && "retAlignment is guaranteed to be set"); SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, - DAG.getConstant(retAlignment, dl, MVT::i32), - DAG.getConstant(resultsz / 8, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { + Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32), + DAG.getConstant(resultsz / 8, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps); InFlag = Chain.getValue(1); @@ -1672,7 +1654,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Both indirect calls and libcalls have nullptr Func. In order to distinguish // between them we must rely on the call site value which is valid for // indirect calls but is always null for libcalls. - bool isIndirectCall = !Func && CS; + bool isIndirectCall = !Func && CB; if (isa<ExternalSymbolSDNode>(Callee)) { Function* CalleeFunc = nullptr; @@ -1695,7 +1677,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The prototype is embedded in a string and put as the operand for a // CallPrototype SDNode which will print out to the value of the string. SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); - std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS); + std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB); const char *ProtoStr = nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); SDValue ProtoOps[] = { @@ -1768,7 +1750,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); assert(VTs.size() == Ins.size() && "Bad value decomposition"); - unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL); auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); SmallVector<EVT, 6> LoadVTs; @@ -1784,7 +1766,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool needTruncate = false; EVT TheLoadType = VTs[i]; EVT EltType = Ins[i].VT; - unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]); + Align EltAlign = commonAlignment(RetAlign, Offsets[i]); if (ExtendIntegerRetVal) { TheLoadType = MVT::i32; EltType = MVT::i32; @@ -2320,10 +2302,10 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { MemSDNode *MemSD = cast<MemSDNode>(N); const DataLayout &TD = DAG.getDataLayout(); - unsigned Align = MemSD->getAlignment(); - unsigned PrefAlign = - TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); - if (Align < PrefAlign) { + Align Alignment = MemSD->getAlign(); + Align PrefAlign = + TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext())); + if (Alignment < PrefAlign) { // This store is not sufficiently aligned, so bail out and let this vector // store be scalarized. Note that we may still be able to emit smaller // vector stores. For example, if we are storing a <4 x float> with an @@ -2559,7 +2541,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); assert(VTs.size() > 0 && "Unexpected empty type."); auto VectorInfo = - VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty)); + VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty)); SDValue Arg = getParamSymbol(DAG, idx, PtrVT); int VecIdx = -1; // Index of the first element of the current vector. @@ -2678,7 +2660,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); auto VectorInfo = VectorizePTXValueVTs( - VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1); + VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1)); // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than // 32-bits are sign extended or zero extended, depending on whether @@ -2730,10 +2712,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Adjust type of load/store op if we've extended the scalar // return value. EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; - Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other), - StoreOperands, TheStoreType, - MachinePointerInfo(), /* Align */ 1, - MachineMemOperand::MOStore); + Chain = DAG.getMemIntrinsicNode( + Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType, + MachinePointerInfo(), Align(1), MachineMemOperand::MOStore); // Cleanup vector state. StoreOperands.clear(); } @@ -3799,8 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = - MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); return true; } @@ -3819,8 +3799,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = - MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); return true; } @@ -4810,11 +4789,10 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, LoadSDNode *LD = cast<LoadSDNode>(N); - unsigned Align = LD->getAlignment(); + Align Alignment = LD->getAlign(); auto &TD = DAG.getDataLayout(); - unsigned PrefAlign = - TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); - if (Align < PrefAlign) { + Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext())); + if (Alignment < PrefAlign) { // This load is not sufficiently aligned, so bail out and let this vector // load be scalarized. Note that we may still be able to emit smaller // vector loads. For example, if we are loading a <4 x float> with an |