diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 120 |
1 files changed, 96 insertions, 24 deletions
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index d39b345bdf032..2ba570b9ebbbc 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -547,7 +547,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = 0; const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4)); - Info.vol = !Vol || !Vol->isNullValue(); + Info.vol = !Vol || !Vol->isZero(); Info.readMem = true; Info.writeMem = true; return true; @@ -713,7 +713,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } } -bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { +bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) { return (MemVT.getSizeInBits() <= 4 * 32); } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { @@ -2374,20 +2375,16 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - switch (IID) { - case Intrinsic::amdgcn_cvt_pkrtz: { + if (IID == Intrinsic::amdgcn_cvt_pkrtz) { SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); SDLoc SL(N); SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32, Src0, Src1); - Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt)); return; } - default: - break; - } + break; } case ISD::SELECT: { SDLoc SL(N); @@ -3736,7 +3733,9 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); EVT VT = Op.getValueType(); - bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags Flags = Op->getFlags(); + bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || + Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal(); if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) return SDValue(); @@ -3771,15 +3770,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } } - const SDNodeFlags Flags = Op->getFlags(); - - if (Unsafe || Flags.hasAllowReciprocal()) { + if (Unsafe) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) - SDNodeFlags NewFlags; - NewFlags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags); } return SDValue(); @@ -4622,15 +4617,99 @@ SDValue SITargetLowering::performClassCombine(SDNode *N, return SDValue(); } +static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { + if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) + return true; + + return DAG.isKnownNeverNaN(Op); +} + +static bool isCanonicalized(SDValue Op, const SISubtarget *ST, + unsigned MaxDepth=5) { + // If source is a result of another standard FP operation it is already in + // canonical form. + + switch (Op.getOpcode()) { + default: + break; + + // These will flush denorms if required. + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FSQRT: + case ISD::FCEIL: + case ISD::FFLOOR: + case ISD::FMA: + case ISD::FMAD: + + case ISD::FCANONICALIZE: + return true; + + case ISD::FP_ROUND: + return Op.getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP_EXTEND: + return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + return ST->hasFP16Denormals(); + + // It can/will be lowered or combined as a bit operation. + // Need to check their input recursively to handle. + case ISD::FNEG: + case ISD::FABS: + return (MaxDepth > 0) && + isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1); + + case ISD::FSIN: + case ISD::FCOS: + case ISD::FSINCOS: + return Op.getValueType().getScalarType() != MVT::f16; + + // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. + // For such targets need to check their input recursively. + // TODO: on GFX9+ we could return true without checking provided no-nan + // mode, since canonicalization is also used to quiet sNaNs. + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + + return (MaxDepth > 0) && + isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1) && + isCanonicalized(Op.getOperand(1), ST, MaxDepth - 1); + + case ISD::ConstantFP: { + auto F = cast<ConstantFPSDNode>(Op)->getValueAPF(); + return !F.isDenormal() && !(F.isNaN() && F.isSignaling()); + } + } + return false; +} + // Constant fold canonicalize. SDValue SITargetLowering::performFCanonicalizeCombine( SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); - if (!CFP) + + if (!CFP) { + SDValue N0 = N->getOperand(0); + + bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction()); + + if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) && + isCanonicalized(N0, getSubtarget())) + return N0; + return SDValue(); + } - SelectionDAG &DAG = DCI.DAG; const APFloat &C = CFP->getValueAPF(); // Flush denormals to 0 if not enabled. @@ -4723,13 +4802,6 @@ SDValue SITargetLowering::performIntMed3ImmCombine( return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3); } -static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { - if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) - return true; - - return DAG.isKnownNeverNaN(Op); -} - SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, SDValue Op0, |