diff options
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 379 |
1 files changed, 302 insertions, 77 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index e0ca9da93561..c73905d3357a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -19,8 +19,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsS390.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include <cctype> @@ -32,12 +33,16 @@ using namespace llvm; namespace { // Represents information about a comparison. struct Comparison { - Comparison(SDValue Op0In, SDValue Op1In) - : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} + Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) + : Op0(Op0In), Op1(Op1In), Chain(ChainIn), + Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} // The operands to the comparison. SDValue Op0, Op1; + // Chain if this is a strict floating-point comparison. + SDValue Chain; + // The opcode that should be used to compare Op0 and Op1. unsigned Opcode; @@ -132,6 +137,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, if (isTypeLegal(VT)) { // Lower SET_CC into an IPM-based sequence. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). setOperationAction(ISD::SELECT, VT, Expand); @@ -212,6 +219,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); if (Subtarget.hasFPExtension()) setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); + + // And similarly for STRICT_[SU]INT_TO_FP. + setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); + if (Subtarget.hasFPExtension()) + setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); } } @@ -251,6 +263,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, if (!Subtarget.hasFPExtension()) { setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); } // We have native support for a 64-bit CTLZ, via FLOGR. @@ -373,6 +387,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands // and inverting the result as necessary. setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::STRICT_FSETCC, VT, Custom); + if (Subtarget.hasVectorEnhancements1()) + setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } } @@ -392,6 +409,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); } if (Subtarget.hasVectorEnhancements2()) { @@ -408,6 +429,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); } // Handle floating-point types. @@ -558,16 +583,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, MVT::v4f32, MVT::v2f64 }) { setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); + setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); + setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); } } - // We have fused multiply-addition for f32 and f64 but not f128. - setOperationAction(ISD::FMA, MVT::f32, Legal); - setOperationAction(ISD::FMA, MVT::f64, Legal); - if (Subtarget.hasVectorEnhancements1()) - setOperationAction(ISD::FMA, MVT::f128, Legal); - else + // We only have fused f128 multiply-addition on vector registers. + if (!Subtarget.hasVectorEnhancements1()) { setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); + } // We don't have a copysign instruction on vector registers. if (Subtarget.hasVectorEnhancements1()) @@ -612,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::STRICT_FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); setTargetDAGCombine(ISD::UDIV); @@ -634,6 +661,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // than "STC;MVC". Handle the choice in target-specific code instead. MaxStoresPerMemset = 0; MaxStoresPerMemsetOptSize = 0; + + // Default to having -disable-strictnode-mutation on + IsStrictFPEnabled = true; } EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, @@ -643,7 +673,8 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, return VT.changeVectorElementTypeToInteger(); } -bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) @@ -1406,7 +1437,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments( // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. - int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); + int64_t RegSaveOffset = -SystemZMC::CallFrameSize; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); @@ -1675,6 +1706,9 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (RetLocs.empty()) return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); + if (CallConv == CallingConv::GHC) + report_fatal_error("GHC functions return void only"); + // Copy the result values into the output registers. SDValue Glue; SmallVector<SDValue, 4> RetOps; @@ -2161,6 +2195,10 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, // negation to set CC, so avoiding separate LOAD AND TEST and // LOAD (NEGATIVE/COMPLEMENT) instructions. static void adjustForFNeg(Comparison &C) { + // This optimization is invalid for strict comparisons, since FNEG + // does not raise any exceptions. + if (C.Chain) + return; auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); if (C1 && C1->isZero()) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -2448,7 +2486,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond) { - Comparison C(Call, SDValue()); + Comparison C(Call, SDValue(), SDValue()); C.Opcode = Opcode; C.CCValid = CCValid; if (Cond == ISD::SETEQ) @@ -2479,8 +2517,11 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, const SDLoc &DL) { + ISD::CondCode Cond, const SDLoc &DL, + SDValue Chain = SDValue(), + bool IsSignaling = false) { if (CmpOp1.getOpcode() == ISD::Constant) { + assert(!Chain); uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && @@ -2492,13 +2533,19 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); } - Comparison C(CmpOp0, CmpOp1); + Comparison C(CmpOp0, CmpOp1, Chain); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { C.CCValid = SystemZ::CCMASK_FCMP; - C.Opcode = SystemZISD::FCMP; + if (!C.Chain) + C.Opcode = SystemZISD::FCMP; + else if (!IsSignaling) + C.Opcode = SystemZISD::STRICT_FCMP; + else + C.Opcode = SystemZISD::STRICT_FCMPS; adjustForFNeg(C); } else { + assert(!C.Chain); C.CCValid = SystemZ::CCMASK_ICMP; C.Opcode = SystemZISD::ICMP; // Choose the type of comparison. Equality and inequality tests can @@ -2556,6 +2603,10 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } + if (C.Chain) { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); + } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } @@ -2600,24 +2651,51 @@ static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot -// be done directly. IsFP is true if CC is for a floating-point rather than -// integer comparison. -static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { +// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP +// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) +// floating-point comparisons, and CmpMode::SignalingFP for strict signaling +// floating-point comparisons. +enum class CmpMode { Int, FP, StrictFP, SignalingFP }; +static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { switch (CC) { case ISD::SETOEQ: case ISD::SETEQ: - return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPE; + case CmpMode::FP: return SystemZISD::VFCMPE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; + } + llvm_unreachable("Bad mode"); case ISD::SETOGE: case ISD::SETGE: - return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0); + switch (Mode) { + case CmpMode::Int: return 0; + case CmpMode::FP: return SystemZISD::VFCMPHE; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; + } + llvm_unreachable("Bad mode"); case ISD::SETOGT: case ISD::SETGT: - return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPH; + case CmpMode::FP: return SystemZISD::VFCMPH; + case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; + case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; + } + llvm_unreachable("Bad mode"); case ISD::SETUGT: - return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL; + switch (Mode) { + case CmpMode::Int: return SystemZISD::VICMPHL; + case CmpMode::FP: return 0; + case CmpMode::StrictFP: return 0; + case CmpMode::SignalingFP: return 0; + } + llvm_unreachable("Bad mode"); default: return 0; @@ -2626,17 +2704,16 @@ static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { // Return the SystemZISD vector comparison operation for CC or its inverse, // or 0 if neither can be done directly. Indicate in Invert whether the -// result is for the inverse of CC. IsFP is true if CC is for a -// floating-point rather than integer comparison. -static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, +// result is for the inverse of CC. Mode is as above. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert) { - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = false; return Opcode; } - CC = ISD::getSetCCInverse(CC, !IsFP); - if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); + if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = true; return Opcode; } @@ -2645,44 +2722,73 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, } // Return a v2f64 that contains the extended form of elements Start and Start+1 -// of v4f32 value Op. +// of v4f32 value Op. If Chain is nonnull, return the strict form. static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, - SDValue Op) { + SDValue Op, SDValue Chain) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); + return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); + } return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); } // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, -// producing a result of type VT. +// producing a result of type VT. If Chain is nonnull, return the strict form. SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain) const { // There is no hardware support for v4f32 (unless we have the vector // enhancements facility 1), so extend the vector into two v2f64s // and compare those. if (CmpOp0.getValueType() == MVT::v4f32 && !Subtarget.hasVectorEnhancements1()) { - SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); - SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); - SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); - SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); + SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); + SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); + SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); + SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); + if (Chain) { + SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); + SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), + H1.getValue(1), L1.getValue(1), + HRes.getValue(1), LRes.getValue(1) }; + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue Ops[2] = { Res, NewChain }; + return DAG.getMergeValues(Ops, DL); + } SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); } + if (Chain) { + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); + } return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); } // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing -// an integer mask of type VT. +// an integer mask of type VT. If Chain is nonnull, we have a strict +// floating-point comparison. If in addition IsSignaling is true, we have +// a strict signaling floating-point comparison. SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, - SDValue CmpOp1) const { + SDValue CmpOp1, + SDValue Chain, + bool IsSignaling) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); + assert (!Chain || IsFP); + assert (!IsSignaling || Chain); + CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : + Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; bool Invert = false; SDValue Cmp; switch (CC) { @@ -2692,9 +2798,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GE.getValue(1)); break; } @@ -2704,9 +2815,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); - SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); - SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); + SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp1, CmpOp0, Chain); + SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), + DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + if (Chain) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + LT.getValue(1), GT.getValue(1)); break; } @@ -2714,15 +2830,17 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, // matter whether we try the inversion or the swap first, since // there are no cases where both work. default: - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); else { CC = ISD::getSetCCSwappedOperands(CC); - if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) - Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); else llvm_unreachable("Unhandled comparison"); } + if (Chain) + Chain = Cmp.getValue(1); break; } if (Invert) { @@ -2730,6 +2848,10 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } + if (Chain && Chain.getNode() != Cmp.getNode()) { + SDValue Ops[2] = { Cmp, Chain }; + Cmp = DAG.getMergeValues(Ops, DL); + } return Cmp; } @@ -2748,6 +2870,29 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } +SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, + SelectionDAG &DAG, + bool IsSignaling) const { + SDValue Chain = Op.getOperand(0); + SDValue CmpOp0 = Op.getOperand(1); + SDValue CmpOp1 = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); + SDLoc DL(Op); + EVT VT = Op.getNode()->getValueType(0); + if (VT.isVector()) { + SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, + Chain, IsSignaling); + return Res.getValue(Op.getResNo()); + } + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); + SDValue CCReg = emitCmp(DAG, DL, C); + CCReg->setFlags(Op->getFlags()); + SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); + SDValue Ops[2] = { Result, CCReg.getValue(1) }; + return DAG.getMergeValues(Ops, DL); +} + SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); @@ -2828,17 +2973,26 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SDValue Result; if (Subtarget.isPC32DBLSymbol(GV, CM)) { - // Assign anchors at 1<<12 byte boundaries. - uint64_t Anchor = Offset & ~uint64_t(0xfff); - Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); - Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); - - // The offset can be folded into the address if it is aligned to a halfword. - Offset -= Anchor; - if (Offset != 0 && (Offset & 1) == 0) { - SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); - Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); - Offset = 0; + if (isInt<32>(Offset)) { + // Assign anchors at 1<<12 byte boundaries. + uint64_t Anchor = Offset & ~uint64_t(0xfff); + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + + // The offset can be folded into the address if it is aligned to a + // halfword. + Offset -= Anchor; + if (Offset != 0 && (Offset & 1) == 0) { + SDValue Full = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); + Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); + Offset = 0; + } + } else { + // Conservatively load a constant offset greater than 32 bits into a + // register below. + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } } else { Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); @@ -2865,6 +3019,10 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, SDValue Chain = DAG.getEntryNode(); SDValue Glue; + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); @@ -2931,6 +3089,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + SDValue TP = lowerThreadPointer(DL, DAG); // Get the offset of GA from the thread pointer, based on the TLS model. @@ -3060,14 +3222,10 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - // If the back chain frame index has not been allocated yet, do so. - SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>(); - int BackChainIdx = FI->getFramePointerSaveIndex(); - if (!BackChainIdx) { - // By definition, the frame address is the address of the back chain. - BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); - FI->setFramePointerSaveIndex(BackChainIdx); - } + // By definition, the frame address is the address of the back chain. + auto *TFL = + static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); + int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); // FIXME The frontend should detect this case. @@ -3585,7 +3743,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, // Get the known-zero mask for the operand. KnownBits Known = DAG.computeKnownBits(Op); - unsigned NumSignificantBits = (~Known.Zero).getActiveBits(); + unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); if (NumSignificantBits == 0) return DAG.getConstant(0, DL, VT); @@ -3861,6 +4019,9 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + report_fatal_error("Variable-sized stack allocations are not supported " + "in GHC calling convention"); return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), SystemZ::R15D, Op.getValueType()); } @@ -3871,6 +4032,10 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + report_fatal_error("Variable-sized stack allocations are not supported " + "in GHC calling convention"); + SDValue Chain = Op.getOperand(0); SDValue NewSP = Op.getOperand(1); SDValue Backchain; @@ -4935,6 +5100,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); + case ISD::STRICT_FSETCC: + return lowerSTRICT_FSETCC(Op, DAG, false); + case ISD::STRICT_FSETCCS: + return lowerSTRICT_FSETCC(Op, DAG, true); case ISD::GlobalAddress: return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); case ISD::GlobalTLSAddress: @@ -5140,6 +5309,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(IABS); OPCODE(ICMP); OPCODE(FCMP); + OPCODE(STRICT_FCMP); + OPCODE(STRICT_FCMPS); OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); @@ -5202,14 +5373,22 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VICMPHS); OPCODE(VICMPHLS); OPCODE(VFCMPE); + OPCODE(STRICT_VFCMPE); + OPCODE(STRICT_VFCMPES); OPCODE(VFCMPH); + OPCODE(STRICT_VFCMPH); + OPCODE(STRICT_VFCMPHS); OPCODE(VFCMPHE); + OPCODE(STRICT_VFCMPHE); + OPCODE(STRICT_VFCMPHES); OPCODE(VFCMPES); OPCODE(VFCMPHS); OPCODE(VFCMPHES); OPCODE(VFTCI); OPCODE(VEXTEND); + OPCODE(STRICT_VEXTEND); OPCODE(VROUND); + OPCODE(STRICT_VROUND); OPCODE(VTM); OPCODE(VFAE_CC); OPCODE(VFAEZ_CC); @@ -5732,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS( return SDValue(); } +static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { + SDValue Chain1 = N1->getOperand(0); + SDValue Chain2 = N2->getOperand(0); + + // Trivial case: both nodes take the same chain. + if (Chain1 == Chain2) + return Chain1; + + // FIXME - we could handle more complex cases via TokenFactor, + // assuming we can verify that this would not create a cycle. + return SDValue(); +} + SDValue SystemZTargetLowering::combineFP_ROUND( SDNode *N, DAGCombinerInfo &DCI) const { @@ -5744,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND( // (extract_vector_elt (VROUND X) 2) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5761,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND( U->getOperand(1).getOpcode() == ISD::Constant && cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); - if (OtherRound.getOpcode() == ISD::FP_ROUND && - OtherRound.getOperand(0) == SDValue(U, 0) && + if (OtherRound.getOpcode() == N->getOpcode() && + OtherRound.getOperand(OpNo) == SDValue(U, 0) && OtherRound.getValueType() == MVT::f32) { - SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), - MVT::v4f32, Vec); + SDValue VRound, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherRound.getNode()); + if (!Chain) + continue; + VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), + {MVT::v4f32, MVT::Other}, {Chain, Vec}); + Chain = VRound.getValue(1); + } else + VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); DCI.AddToWorklist(VRound.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -5795,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( // (extract_vector_elt (VEXTEND X) 1) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5812,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( U->getOperand(1).getOpcode() == ISD::Constant && cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); - if (OtherExtend.getOpcode() == ISD::FP_EXTEND && - OtherExtend.getOperand(0) == SDValue(U, 0) && + if (OtherExtend.getOpcode() == N->getOpcode() && + OtherExtend.getOperand(OpNo) == SDValue(U, 0) && OtherExtend.getValueType() == MVT::f64) { - SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), - MVT::v2f64, Vec); + SDValue VExtend, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherExtend.getNode()); + if (!Chain) + continue; + VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), + {MVT::v2f64, MVT::Other}, {Chain, Vec}); + Chain = VExtend.getValue(1); + } else + VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), + MVT::v2f64, Vec); DCI.AddToWorklist(VExtend.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -6165,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); @@ -7523,7 +7747,8 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( // Replace pseudo with a normal load-and-test that models the def as // well. BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) - .addReg(SrcReg); + .addReg(SrcReg) + .setMIFlags(MI.getFlags()); MI.eraseFromParent(); return MBB; |