summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp164
1 files changed, 71 insertions, 93 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 92f71c687c461..f45cc06e0a0a3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -88,11 +87,6 @@ static cl::opt<bool> UsePrecSqrtF32(
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
cl::init(true));
-static cl::opt<bool> FtzEnabled(
- "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
- cl::init(false));
-
int NVPTXTargetLowering::getDivF32Level() const {
if (UsePrecDivF32.getNumOccurrences() > 0) {
// If nvptx-prec-div32=N is used on the command-line, always honor it
@@ -117,18 +111,8 @@ bool NVPTXTargetLowering::usePrecSqrtF32() const {
}
bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
- // TODO: Get rid of this flag; there can be only one way to do this.
- if (FtzEnabled.getNumOccurrences() > 0) {
- // If nvptx-f32ftz is used on the command-line, always honor it
- return FtzEnabled;
- } else {
- const Function &F = MF.getFunction();
- // Otherwise, check for an nvptx-f32ftz attribute on the function
- if (F.hasFnAttribute("nvptx-f32ftz"))
- return F.getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
- else
- return false;
- }
+ return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
+ DenormalMode::PreserveSign;
}
static bool IsPTXVectorType(MVT VT) {
@@ -233,11 +217,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// covered by the vector op. Otherwise, it returns 1.
static unsigned CanMergeParamLoadStoresStartingAt(
unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
- const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) {
- assert(isPowerOf2_32(AccessSize) && "must be a power of 2!");
+ const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
// Can't vectorize if param alignment is not sufficient.
- if (AccessSize > ParamAlignment)
+ if (ParamAlignment < AccessSize)
return 1;
// Can't vectorize if offset is not aligned.
if (Offsets[Idx] & (AccessSize - 1))
@@ -297,7 +280,7 @@ enum ParamVectorizationFlags {
static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
const SmallVectorImpl<uint64_t> &Offsets,
- unsigned ParamAlignment) {
+ Align ParamAlignment) {
// Set vector size to match ValueVTs and mark all elements as
// scalars by default.
SmallVector<ParamVectorizationFlags, 16> VectorInfo;
@@ -1258,8 +1241,8 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
- ImmutableCallSite CS) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
+ const CallBase &CB) const {
auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
@@ -1294,8 +1277,8 @@ std::string NVPTXTargetLowering::getPrototype(
O << ".param .b" << PtrVT.getSizeInBits() << " _";
} else if (retTy->isAggregateType() || retTy->isVectorTy() ||
retTy->isIntegerTy(128)) {
- O << ".param .align " << retAlignment << " .b8 _["
- << DL.getTypeAllocSize(retTy) << "]";
+ O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
+ << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
} else {
llvm_unreachable("Unknown return type");
}
@@ -1316,7 +1299,7 @@ std::string NVPTXTargetLowering::getPrototype(
if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
unsigned align = 0;
- const CallInst *CallI = cast<CallInst>(CS.getInstruction());
+ const CallInst *CallI = cast<CallInst>(&CB);
// +1 because index 0 is reserved for return type alignment
if (!getAlign(*CallI, i + 1, align))
align = DL.getABITypeAlignment(Ty);
@@ -1358,9 +1341,9 @@ std::string NVPTXTargetLowering::getPrototype(
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
- unsigned align = Outs[OIdx].Flags.getByValAlign();
+ Align align = Outs[OIdx].Flags.getNonZeroByValAlign();
unsigned sz = DL.getTypeAllocSize(ETy);
- O << ".param .align " << align << " .b8 ";
+ O << ".param .align " << align.value() << " .b8 ";
O << "_";
O << "[" << sz << "]";
}
@@ -1368,31 +1351,29 @@ std::string NVPTXTargetLowering::getPrototype(
return O.str();
}
-unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
- ImmutableCallSite CS,
- Type *Ty, unsigned Idx,
- const DataLayout &DL) const {
- if (!CS) {
+Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
+ const CallBase *CB, Type *Ty,
+ unsigned Idx,
+ const DataLayout &DL) const {
+ if (!CB) {
// CallSite is zero, fallback to ABI type alignment
- return DL.getABITypeAlignment(Ty);
+ return DL.getABITypeAlign(Ty);
}
- unsigned Align = 0;
- const Value *DirectCallee = CS.getCalledFunction();
+ unsigned Alignment = 0;
+ const Function *DirectCallee = CB->getCalledFunction();
if (!DirectCallee) {
// We don't have a direct function symbol, but that may be because of
// constant cast instructions in the call.
- const Instruction *CalleeI = CS.getInstruction();
- assert(CalleeI && "Call target is not a function or derived value?");
// With bitcast'd call targets, the instruction will be the call
- if (isa<CallInst>(CalleeI)) {
+ if (const auto *CI = dyn_cast<CallInst>(CB)) {
// Check if we have call alignment metadata
- if (getAlign(*cast<CallInst>(CalleeI), Idx, Align))
- return Align;
+ if (getAlign(*CI, Idx, Alignment))
+ return Align(Alignment);
- const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
+ const Value *CalleeV = CI->getCalledOperand();
// Ignore any bitcast instructions
while (isa<ConstantExpr>(CalleeV)) {
const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
@@ -1404,20 +1385,20 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
// We have now looked past all of the bitcasts. Do we finally have a
// Function?
- if (isa<Function>(CalleeV))
- DirectCallee = CalleeV;
+ if (const auto *CalleeF = dyn_cast<Function>(CalleeV))
+ DirectCallee = CalleeF;
}
}
// Check for function alignment information if we found that the
// ultimate target is a Function
if (DirectCallee)
- if (getAlign(*cast<Function>(DirectCallee), Idx, Align))
- return Align;
+ if (getAlign(*DirectCallee, Idx, Alignment))
+ return Align(Alignment);
// Call is indirect or alignment information is not available, fall back to
// the ABI type alignment
- return DL.getABITypeAlignment(Ty);
+ return DL.getABITypeAlign(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -1432,7 +1413,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool &isTailCall = CLI.IsTailCall;
ArgListTy &Args = CLI.getArgs();
Type *RetTy = CLI.RetTy;
- ImmutableCallSite CS = CLI.CS;
+ const CallBase *CB = CLI.CB;
const DataLayout &DL = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
@@ -1465,15 +1446,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
- unsigned ArgAlign =
- getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
+ Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL);
unsigned AllocSize = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
bool NeedAlign; // Does argument declaration specify alignment?
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
// declare .param .align <align> .b8 .param<n>[<size>];
SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
+ Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
DAG.getConstant(paramCount, dl, MVT::i32),
DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
@@ -1554,8 +1534,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Adjust type of the store op if we've extended the scalar
// return value.
EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
- unsigned EltAlign =
- NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0;
+ MaybeAlign EltAlign;
+ if (NeedAlign)
+ EltAlign = commonAlignment(ArgAlign, Offsets[j]);
Chain = DAG.getMemIntrinsicNode(
Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
@@ -1585,7 +1566,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
+ Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
// The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
// so we don't need to worry about natural alignment or not.
// See TargetLowering::LowerCallTo().
@@ -1593,18 +1574,19 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Enforce minumum alignment of 4 to work around ptxas miscompile
// for sm_50+. See corresponding alignment adjustment in
// emitFunctionParamList() for details.
- if (ArgAlign < 4)
- ArgAlign = 4;
- SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32), InFlag};
+ if (ArgAlign < Align(4))
+ ArgAlign = Align(4);
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
EVT elemtype = VTs[j];
int curOffset = Offsets[j];
- unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
+ unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset);
auto PtrVT = getPointerTy(DL);
SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
DAG.getConstant(curOffset, dl, PtrVT));
@@ -1618,10 +1600,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(paramCount, dl, MVT::i32),
DAG.getConstant(curOffset, dl, MVT::i32),
theVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, elemtype,
- MachinePointerInfo(), /* Align */ 0,
- MachineMemOperand::MOStore);
+ Chain = DAG.getMemIntrinsicNode(
+ NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype,
+ MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore);
InFlag = Chain.getValue(1);
}
@@ -1629,7 +1610,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
- unsigned retAlignment = 0;
+ MaybeAlign retAlignment = None;
// Handle Result
if (Ins.size() > 0) {
@@ -1657,12 +1638,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DeclareRetOps);
InFlag = Chain.getValue(1);
} else {
- retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
+ assert(retAlignment && "retAlignment is guaranteed to be set");
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain,
- DAG.getConstant(retAlignment, dl, MVT::i32),
- DAG.getConstant(resultsz / 8, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag };
+ SDValue DeclareRetOps[] = {
+ Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
+ DAG.getConstant(resultsz / 8, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
DeclareRetOps);
InFlag = Chain.getValue(1);
@@ -1672,7 +1654,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Both indirect calls and libcalls have nullptr Func. In order to distinguish
// between them we must rely on the call site value which is valid for
// indirect calls but is always null for libcalls.
- bool isIndirectCall = !Func && CS;
+ bool isIndirectCall = !Func && CB;
if (isa<ExternalSymbolSDNode>(Callee)) {
Function* CalleeFunc = nullptr;
@@ -1695,7 +1677,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
+ std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1768,7 +1750,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
- unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
SmallVector<EVT, 6> LoadVTs;
@@ -1784,7 +1766,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool needTruncate = false;
EVT TheLoadType = VTs[i];
EVT EltType = Ins[i].VT;
- unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]);
+ Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
if (ExtendIntegerRetVal) {
TheLoadType = MVT::i32;
EltType = MVT::i32;
@@ -2320,10 +2302,10 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
MemSDNode *MemSD = cast<MemSDNode>(N);
const DataLayout &TD = DAG.getDataLayout();
- unsigned Align = MemSD->getAlignment();
- unsigned PrefAlign =
- TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
- if (Align < PrefAlign) {
+ Align Alignment = MemSD->getAlign();
+ Align PrefAlign =
+ TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
+ if (Alignment < PrefAlign) {
// This store is not sufficiently aligned, so bail out and let this vector
// store be scalarized. Note that we may still be able to emit smaller
// vector stores. For example, if we are storing a <4 x float> with an
@@ -2559,7 +2541,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
assert(VTs.size() > 0 && "Unexpected empty type.");
auto VectorInfo =
- VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty));
+ VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty));
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
int VecIdx = -1; // Index of the first element of the current vector.
@@ -2678,7 +2660,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
auto VectorInfo = VectorizePTXValueVTs(
- VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1);
+ VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1));
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
// 32-bits are sign extended or zero extended, depending on whether
@@ -2730,10 +2712,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Adjust type of load/store op if we've extended the scalar
// return value.
EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
- Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other),
- StoreOperands, TheStoreType,
- MachinePointerInfo(), /* Align */ 1,
- MachineMemOperand::MOStore);
+ Chain = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
+ MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
// Cleanup vector state.
StoreOperands.clear();
}
@@ -3799,8 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align =
- MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
return true;
}
@@ -3819,8 +3799,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align =
- MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
return true;
}
@@ -4810,11 +4789,10 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LoadSDNode *LD = cast<LoadSDNode>(N);
- unsigned Align = LD->getAlignment();
+ Align Alignment = LD->getAlign();
auto &TD = DAG.getDataLayout();
- unsigned PrefAlign =
- TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
- if (Align < PrefAlign) {
+ Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext()));
+ if (Alignment < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
// vector loads. For example, if we are loading a <4 x float> with an