summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp120
1 files changed, 96 insertions, 24 deletions
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index d39b345bdf032..2ba570b9ebbbc 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -547,7 +547,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = 0;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
- Info.vol = !Vol || !Vol->isNullValue();
+ Info.vol = !Vol || !Vol->isZero();
Info.readMem = true;
Info.writeMem = true;
return true;
@@ -713,7 +713,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
}
-bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const {
+bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
+ const SelectionDAG &DAG) const {
if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) {
return (MemVT.getSizeInBits() <= 4 * 32);
} else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
@@ -2374,20 +2375,16 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IID) {
- case Intrinsic::amdgcn_cvt_pkrtz: {
+ if (IID == Intrinsic::amdgcn_cvt_pkrtz) {
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
SDLoc SL(N);
SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32,
Src0, Src1);
-
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
return;
}
- default:
- break;
- }
+ break;
}
case ISD::SELECT: {
SDLoc SL(N);
@@ -3736,7 +3733,9 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT VT = Op.getValueType();
- bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags Flags = Op->getFlags();
+ bool Unsafe = DAG.getTarget().Options.UnsafeFPMath ||
+ Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal();
if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals())
return SDValue();
@@ -3771,15 +3770,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
}
}
- const SDNodeFlags Flags = Op->getFlags();
-
- if (Unsafe || Flags.hasAllowReciprocal()) {
+ if (Unsafe) {
// Turn into multiply by the reciprocal.
// x / y -> x * (1.0 / y)
- SDNodeFlags NewFlags;
- NewFlags.setUnsafeAlgebra(true);
SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags);
+ return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags);
}
return SDValue();
@@ -4622,15 +4617,99 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
return SDValue();
}
+static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
+ if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
+ return true;
+
+ return DAG.isKnownNeverNaN(Op);
+}
+
+static bool isCanonicalized(SDValue Op, const SISubtarget *ST,
+ unsigned MaxDepth=5) {
+ // If source is a result of another standard FP operation it is already in
+ // canonical form.
+
+ switch (Op.getOpcode()) {
+ default:
+ break;
+
+ // These will flush denorms if required.
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FSQRT:
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FMA:
+ case ISD::FMAD:
+
+ case ISD::FCANONICALIZE:
+ return true;
+
+ case ISD::FP_ROUND:
+ return Op.getValueType().getScalarType() != MVT::f16 ||
+ ST->hasFP16Denormals();
+
+ case ISD::FP_EXTEND:
+ return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
+ ST->hasFP16Denormals();
+
+ case ISD::FP16_TO_FP:
+ case ISD::FP_TO_FP16:
+ return ST->hasFP16Denormals();
+
+ // It can/will be lowered or combined as a bit operation.
+ // Need to check their input recursively to handle.
+ case ISD::FNEG:
+ case ISD::FABS:
+ return (MaxDepth > 0) &&
+ isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1);
+
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FSINCOS:
+ return Op.getValueType().getScalarType() != MVT::f16;
+
+ // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms.
+ // For such targets need to check their input recursively.
+ // TODO: on GFX9+ we could return true without checking provided no-nan
+ // mode, since canonicalization is also used to quiet sNaNs.
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+
+ return (MaxDepth > 0) &&
+ isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1) &&
+ isCanonicalized(Op.getOperand(1), ST, MaxDepth - 1);
+
+ case ISD::ConstantFP: {
+ auto F = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ return !F.isDenormal() && !(F.isNaN() && F.isSignaling());
+ }
+ }
+ return false;
+}
+
// Constant fold canonicalize.
SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
- if (!CFP)
+
+ if (!CFP) {
+ SDValue N0 = N->getOperand(0);
+
+ bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+
+ if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) &&
+ isCanonicalized(N0, getSubtarget()))
+ return N0;
+
return SDValue();
+ }
- SelectionDAG &DAG = DCI.DAG;
const APFloat &C = CFP->getValueAPF();
// Flush denormals to 0 if not enabled.
@@ -4723,13 +4802,6 @@ SDValue SITargetLowering::performIntMed3ImmCombine(
return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
}
-static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
- if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
- return true;
-
- return DAG.isKnownNeverNaN(Op);
-}
-
SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Op0,