diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Target/SystemZ/SystemZISelLowering.cpp | |
parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) |
Notes
Diffstat (limited to 'lib/Target/SystemZ/SystemZISelLowering.cpp')
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 1149 |
1 files changed, 913 insertions, 236 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index adf368319dc3..302c7883f97b 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -31,17 +31,6 @@ using namespace llvm; #define DEBUG_TYPE "systemz-lower" namespace { -// Represents a sequence for extracting a 0/1 value from an IPM result: -// (((X ^ XORValue) + AddValue) >> Bit) -struct IPMConversion { - IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) - : XORValue(xorValue), AddValue(addValue), Bit(bit) {} - - int64_t XORValue; - int64_t AddValue; - unsigned Bit; -}; - // Represents information about a comparison. struct Comparison { Comparison(SDValue Op0In, SDValue Op1In) @@ -87,7 +76,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); + MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0)); // Set up the register classes. if (Subtarget.hasHighWord()) @@ -133,6 +122,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Instructions are strings of 2-byte aligned 2-byte values. setMinFunctionAlignment(2); + // For performance reasons we prefer 16-byte alignment. + setPrefFunctionAlignment(4); // Handle operations that are handled in a similar way for all types. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; @@ -173,6 +164,18 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, VT, Custom); setOperationAction(ISD::UDIVREM, VT, Custom); + // Support addition/subtraction with overflow. + setOperationAction(ISD::SADDO, VT, Custom); + setOperationAction(ISD::SSUBO, VT, Custom); + + // Support addition/subtraction with carry. + setOperationAction(ISD::UADDO, VT, Custom); + setOperationAction(ISD::USUBO, VT, Custom); + + // Support carry in as value rather than glue. + setOperationAction(ISD::ADDCARRY, VT, Custom); + setOperationAction(ISD::SUBCARRY, VT, Custom); + // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and // stores, putting a serialization instruction after the stores. setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); @@ -517,7 +520,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VAEND, MVT::Other, Expand); // Codes for which we want to perform some z-specific combinations. + setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); @@ -643,7 +648,8 @@ supportedAddressingMode(Instruction *I, bool HasVector) { if (SingleUser->getParent() == I->getParent()) { if (isa<ICmpInst>(SingleUser)) { if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1))) - if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())) + if (C->getBitWidth() <= 64 && + (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) // Comparison of memory with 16 bit signed / unsigned immediate return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); } else if (isa<StoreInst>(SingleUser)) @@ -748,6 +754,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { case 'f': // Floating-point register case 'h': // High-part register case 'r': // General-purpose register + case 'v': // Vector register return C_RegisterClass; case 'Q': // Memory with base and unsigned 12-bit displacement @@ -800,6 +807,12 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, weight = CW_Register; break; + case 'v': // Vector register + if ((type->isVectorTy() || type->isFloatingPointTy()) && + Subtarget.hasVector()) + weight = CW_Register; + break; + case 'I': // Unsigned 8-bit constant if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) if (isUInt<8>(C->getZExtValue())) @@ -838,13 +851,13 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, // Map maps 0-based register numbers to LLVM register numbers. static std::pair<unsigned, const TargetRegisterClass *> parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, - const unsigned *Map) { + const unsigned *Map, unsigned Size) { assert(*(Constraint.end()-1) == '}' && "Missing '}'"); if (isdigit(Constraint[2])) { unsigned Index; bool Failed = Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); - if (!Failed && Index < 16 && Map[Index]) + if (!Failed && Index < Size && Map[Index]) return std::make_pair(Map[Index], RC); } return std::make_pair(0U, nullptr); @@ -881,6 +894,16 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( else if (VT == MVT::f128) return std::make_pair(0U, &SystemZ::FP128BitRegClass); return std::make_pair(0U, &SystemZ::FP32BitRegClass); + + case 'v': // Vector register + if (Subtarget.hasVector()) { + if (VT == MVT::f32) + return std::make_pair(0U, &SystemZ::VR32BitRegClass); + if (VT == MVT::f64) + return std::make_pair(0U, &SystemZ::VR64BitRegClass); + return std::make_pair(0U, &SystemZ::VR128BitRegClass); + } + break; } } if (Constraint.size() > 0 && Constraint[0] == '{') { @@ -891,22 +914,32 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( if (Constraint[1] == 'r') { if (VT == MVT::i32) return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, - SystemZMC::GR32Regs); + SystemZMC::GR32Regs, 16); if (VT == MVT::i128) return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, - SystemZMC::GR128Regs); + SystemZMC::GR128Regs, 16); return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, - SystemZMC::GR64Regs); + SystemZMC::GR64Regs, 16); } if (Constraint[1] == 'f') { if (VT == MVT::f32) return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, - SystemZMC::FP32Regs); + SystemZMC::FP32Regs, 16); if (VT == MVT::f128) return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, - SystemZMC::FP128Regs); + SystemZMC::FP128Regs, 16); return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, - SystemZMC::FP64Regs); + SystemZMC::FP64Regs, 16); + } + if (Constraint[1] == 'v') { + if (VT == MVT::f32) + return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, + SystemZMC::VR32Regs, 32); + if (VT == MVT::f64) + return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, + SystemZMC::VR64Regs, 32); + return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, + SystemZMC::VR128Regs, 32); } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); @@ -964,6 +997,13 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, #include "SystemZGenCallingConv.inc" +const MCPhysReg *SystemZTargetLowering::getScratchRegisters( + CallingConv::ID) const { + static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, + SystemZ::R14D, 0 }; + return ScratchRegs; +} + bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, Type *ToType) const { return isTruncateFree(FromType, ToType); @@ -1634,9 +1674,9 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { } } -// Emit an intrinsic with chain with a glued value instead of its CC result. -static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, - unsigned Opcode) { +// Emit an intrinsic with chain and an explicit CC register result. +static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector<SDValue, 6> Ops; @@ -1646,17 +1686,17 @@ static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, Ops.push_back(Op.getOperand(I)); assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); - SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); SDValue OldChain = SDValue(Op.getNode(), 1); - SDValue NewChain = SDValue(Intr.getNode(), 0); + SDValue NewChain = SDValue(Intr.getNode(), 1); DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); - return Intr; + return Intr.getNode(); } -// Emit an intrinsic with a glued value instead of its CC result. -static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, - unsigned Opcode) { +// Emit an intrinsic with an explicit CC register result. +static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector<SDValue, 6> Ops; @@ -1664,11 +1704,8 @@ static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, for (unsigned I = 1; I < NumOps; ++I) Ops.push_back(Op.getOperand(I)); - if (Op->getNumValues() == 1) - return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); - assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); - SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); - return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); + return Intr.getNode(); } // CC is a comparison that will be implemented using an integer or @@ -1699,73 +1736,6 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) { #undef CONV } -// Return a sequence for getting a 1 from an IPM result when CC has a -// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. -// The handling of CC values outside CCValid doesn't matter. -static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { - // Deal with cases where the result can be taken directly from a bit - // of the IPM result. - if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) - return IPMConversion(0, 0, SystemZ::IPM_CC); - if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) - return IPMConversion(0, 0, SystemZ::IPM_CC + 1); - - // Deal with cases where we can add a value to force the sign bit - // to contain the right value. Putting the bit in 31 means we can - // use SRL rather than RISBG(L), and also makes it easier to get a - // 0/-1 value, so it has priority over the other tests below. - // - // These sequences rely on the fact that the upper two bits of the - // IPM result are zero. - uint64_t TopBit = uint64_t(1) << 31; - if (CCMask == (CCValid & SystemZ::CCMASK_0)) - return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) - return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_1 - | SystemZ::CCMASK_2))) - return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & SystemZ::CCMASK_3)) - return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_1 - | SystemZ::CCMASK_2 - | SystemZ::CCMASK_3))) - return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); - - // Next try inverting the value and testing a bit. 0/1 could be - // handled this way too, but we dealt with that case above. - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) - return IPMConversion(-1, 0, SystemZ::IPM_CC); - - // Handle cases where adding a value forces a non-sign bit to contain - // the right value. - if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) - return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) - return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); - - // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are - // can be done by inverting the low CC bit and applying one of the - // sign-based extractions above. - if (CCMask == (CCValid & SystemZ::CCMASK_1)) - return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & SystemZ::CCMASK_2)) - return IPMConversion(1 << SystemZ::IPM_CC, - TopBit - (3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_1 - | SystemZ::CCMASK_3))) - return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_2 - | SystemZ::CCMASK_3))) - return IPMConversion(1 << SystemZ::IPM_CC, - TopBit - (1 << SystemZ::IPM_CC), 31); - - llvm_unreachable("Unexpected CC combination"); -} - // If C can be converted to a comparison against zero, adjust the operands // as necessary. static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { @@ -2237,6 +2207,24 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, C.CCMask = NewCCMask; } +// See whether the comparison argument contains a redundant AND +// and remove it if so. This sometimes happens due to the generic +// BRCOND expansion. +static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + if (C.Op0.getOpcode() != ISD::AND) + return; + auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); + if (!Mask) + return; + KnownBits Known; + DAG.computeKnownBits(C.Op0.getOperand(0), Known); + if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) + return; + + C.Op0 = C.Op0.getOperand(0); +} + // Return a Comparison that tests the condition-code result of intrinsic // node Call against constant integer CC using comparison code Cond. // Opcode is the opcode of the SystemZISD operation for the intrinsic @@ -2311,6 +2299,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; + adjustForRedundantAnd(DAG, DL, C); adjustZeroCmp(DAG, DL, C); adjustSubwordCmp(DAG, DL, C); adjustForSubtraction(DAG, DL, C); @@ -2330,29 +2319,28 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (!C.Op1.getNode()) { - SDValue Op; + SDNode *Node; switch (C.Op0.getOpcode()) { case ISD::INTRINSIC_W_CHAIN: - Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); - break; + Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); + return SDValue(Node, 0); case ISD::INTRINSIC_WO_CHAIN: - Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); - break; + Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); + return SDValue(Node, Node->getNumValues() - 1); default: llvm_unreachable("Invalid comparison operands"); } - return SDValue(Op.getNode(), Op->getNumValues() - 1); } if (C.Opcode == SystemZISD::ICMP) - return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, + return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, DAG.getConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); - return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, + return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getConstant(RegisterOnly, DL, MVT::i32)); } - return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); + return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } // Implement a 32-bit *MUL_LOHI operation by extending both operands to @@ -2383,29 +2371,16 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); } -// Return an i32 value that is 1 if the CC value produced by Glue is +// Return an i32 value that is 1 if the CC value produced by CCReg is // in the mask CCMask and 0 otherwise. CC is known to have a value // in CCValid, so other values can be ignored. -static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, +static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask) { - IPMConversion Conversion = getIPMConversion(CCValid, CCMask); - SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); - - if (Conversion.XORValue) - Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, - DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); - - if (Conversion.AddValue) - Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, - DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); - - // The SHR/AND sequence should get optimized to an RISBG. - Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, - DAG.getConstant(Conversion.Bit, DL, MVT::i32)); - if (Conversion.Bit != 31) - Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, - DAG.getConstant(1, DL, MVT::i32)); - return Result; + SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(CCValid, DL, MVT::i32), + DAG.getConstant(CCMask, DL, MVT::i32), CCReg }; + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot @@ -2554,8 +2529,8 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); - SDValue Glue = emitCmp(DAG, DL, C); - return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); + SDValue CCReg = emitCmp(DAG, DL, C); + return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { @@ -2566,10 +2541,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); - SDValue Glue = emitCmp(DAG, DL, C); + SDValue CCReg = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); + DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -2619,36 +2594,11 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); } - SDValue Glue = emitCmp(DAG, DL, C); - - // Special case for handling -1/0 results. The shifts we use here - // should get optimized with the IPM conversion sequence. - auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp); - auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp); - if (TrueC && FalseC) { - int64_t TrueVal = TrueC->getSExtValue(); - int64_t FalseVal = FalseC->getSExtValue(); - if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { - // Invert the condition if we want -1 on false. - if (TrueVal == 0) - C.CCMask ^= C.CCValid; - SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); - EVT VT = Op.getValueType(); - // Extend the result to VT. Upper bits are ignored. - if (!is32Bit(VT)) - Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); - // Sign-extend from the low bit. - SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); - return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); - } - } - + SDValue CCReg = emitCmp(DAG, DL, C); SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; + DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg}; - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, @@ -2757,7 +2707,7 @@ SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { - if (DAG.getTarget().Options.EmulatedTLS) + if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(Node, DAG); SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); @@ -3266,6 +3216,99 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +// Lower SADDO/SSUBO/UADDO/USUBO nodes. +SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, + SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc DL(N); + unsigned BaseOp = 0; + unsigned CCValid = 0; + unsigned CCMask = 0; + + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown instruction!"); + case ISD::SADDO: + BaseOp = SystemZISD::SADDO; + CCValid = SystemZ::CCMASK_ARITH; + CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; + break; + case ISD::SSUBO: + BaseOp = SystemZISD::SSUBO; + CCValid = SystemZ::CCMASK_ARITH; + CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; + break; + case ISD::UADDO: + BaseOp = SystemZISD::UADDO; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_CARRY; + break; + case ISD::USUBO: + BaseOp = SystemZISD::USUBO; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_BORROW; + break; + } + + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); + SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); + + SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); + if (N->getValueType(1) == MVT::i1) + SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); +} + +// Lower ADDCARRY/SUBCARRY nodes. +SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op, + SelectionDAG &DAG) const { + + SDNode *N = Op.getNode(); + MVT VT = N->getSimpleValueType(0); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = Op.getOperand(2); + SDLoc DL(N); + unsigned BaseOp = 0; + unsigned CCValid = 0; + unsigned CCMask = 0; + + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown instruction!"); + case ISD::ADDCARRY: + BaseOp = SystemZISD::ADDCARRY; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_CARRY; + break; + case ISD::SUBCARRY: + BaseOp = SystemZISD::SUBCARRY; + CCValid = SystemZ::CCMASK_LOGICAL; + CCMask = SystemZ::CCMASK_LOGICAL_BORROW; + break; + } + + // Set the condition code from the carry flag. + Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, + DAG.getConstant(CCValid, DL, MVT::i32), + DAG.getConstant(CCMask, DL, MVT::i32)); + + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); + + SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); + if (N->getValueType(1) == MVT::i1) + SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); +} + SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -3512,16 +3555,16 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, EVT NarrowVT = Node->getMemoryVT(); EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; if (NarrowVT == WideVT) { - SDVTList Tys = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); + SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, DL, Tys, Ops, NarrowVT, MMO); - SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); - DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } @@ -3546,17 +3589,17 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. - SDVTList VTList = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); + SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); - SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); - DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } @@ -3613,12 +3656,10 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } -// Return an i32 that contains the value of CC immediately after After, -// whose final operand must be MVT::Glue. -static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { - SDLoc DL(After); - SDValue Glue = SDValue(After, After->getNumValues() - 1); - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); +// Convert condition code in CCReg to an i32 value. +static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { + SDLoc DL(CCReg); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); } @@ -3629,8 +3670,8 @@ SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, unsigned Opcode, CCValid; if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); - SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); - SDValue CC = getCCResult(DAG, Glued.getNode()); + SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, SDValue(Node, 0)); DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); return SDValue(); } @@ -3643,13 +3684,12 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned Opcode, CCValid; if (isIntrinsicWithCC(Op, Opcode, CCValid)) { - SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); - SDValue CC = getCCResult(DAG, Glued.getNode()); + SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); if (Op->getNumValues() == 1) - return CC; + return getCCResult(DAG, SDValue(Node, 0)); assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); - return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued, - CC); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), + SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); } unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -3853,20 +3893,34 @@ static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, return nullptr; } -// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask, +// Convert the mask of the given shuffle op into a byte-level mask, // as if it had type vNi8. -static void getVPermMask(ShuffleVectorSDNode *VSN, +static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl<int> &Bytes) { - EVT VT = VSN->getValueType(0); + EVT VT = ShuffleOp.getValueType(); unsigned NumElements = VT.getVectorNumElements(); unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); - Bytes.resize(NumElements * BytesPerElement, -1); - for (unsigned I = 0; I < NumElements; ++I) { - int Index = VSN->getMaskElt(I); - if (Index >= 0) + + if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) { + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) { + int Index = VSN->getMaskElt(I); + if (Index >= 0) + for (unsigned J = 0; J < BytesPerElement; ++J) + Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + } + return true; + } + if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && + isa<ConstantSDNode>(ShuffleOp.getOperand(1))) { + unsigned Index = ShuffleOp.getConstantOperandVal(1); + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) for (unsigned J = 0; J < BytesPerElement; ++J) Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + return true; } + return false; } // Bytes is a VPERM-like permute vector, except that -1 is used for @@ -4035,7 +4089,8 @@ bool GeneralShuffle::add(SDValue Op, unsigned Elem) { // See whether the bytes we need come from a contiguous part of one // operand. SmallVector<int, SystemZ::VectorBytes> OpBytes; - getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes); + if (!getVPermMask(Op, OpBytes)) + break; int NewByte; if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) break; @@ -4217,9 +4272,9 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { if (!Op.isUndef()) { uint64_t Value; if (Op.getOpcode() == ISD::Constant) - Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue(); + Value = cast<ConstantSDNode>(Op)->getZExtValue(); else if (Op.getOpcode() == ISD::ConstantFP) - Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt() + Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt() .getZExtValue()); else return false; @@ -4245,12 +4300,15 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, const SDLoc &DL, EVT VT, uint64_t Value, unsigned BitsPerElement) { // Signed 16-bit values can be replicated using VREPI. + // Mark the constants as opaque or DAGCombiner will convert back to + // BUILD_VECTOR. int64_t SignedValue = SignExtend64(Value, BitsPerElement); if (isInt<16>(SignedValue)) { MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT, - DAG.getConstant(SignedValue, DL, MVT::i32)); + SDValue Op = DAG.getNode( + SystemZISD::REPLICATE, DL, VecVT, + DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/)); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } // See whether rotating the constant left some N places gives a value that @@ -4266,9 +4324,10 @@ static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, End -= 64 - BitsPerElement; MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT, - DAG.getConstant(Start, DL, MVT::i32), - DAG.getConstant(End, DL, MVT::i32)); + SDValue Op = DAG.getNode( + SystemZISD::ROTATE_MASK, DL, VecVT, + DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/), + DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/)); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } return SDValue(); @@ -4481,8 +4540,9 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, // priority over other methods below. uint64_t Mask = 0; if (tryBuildVectorByteMask(BVN, Mask)) { - SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(Mask, DL, MVT::i32)); + SDValue Op = DAG.getNode( + SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/)); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } @@ -4597,7 +4657,7 @@ SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, Op1.getOpcode() != ISD::BITCAST && Op1.getOpcode() != ISD::ConstantFP && Op2.getOpcode() == ISD::Constant) { - uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue(); + uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue(); unsigned Mask = VT.getVectorNumElements() - 1; if (Index <= Mask) return Op; @@ -4753,6 +4813,14 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSDIVREM(Op, DAG); case ISD::UDIVREM: return lowerUDIVREM(Op, DAG); + case ISD::SADDO: + case ISD::SSUBO: + case ISD::UADDO: + case ISD::USUBO: + return lowerXALUO(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: + return lowerADDSUBCARRY(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); case ISD::CTPOP: @@ -4881,19 +4949,19 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { SDLoc DL(N); - SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other, MVT::Glue); + SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), lowerI128ToGR128(DAG, N->getOperand(2)), lowerI128ToGR128(DAG, N->getOperand(3)) }; MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, DL, Tys, Ops, MVT::i128, MMO); - SDValue Success = emitSETCC(DAG, DL, Res.getValue(2), + SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); Results.push_back(lowerGR128ToI128(DAG, Res)); Results.push_back(Success); - Results.push_back(Res.getValue(1)); + Results.push_back(Res.getValue(2)); break; } default: @@ -4931,6 +4999,13 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(UMUL_LOHI); OPCODE(SDIVREM); OPCODE(UDIVREM); + OPCODE(SADDO); + OPCODE(SSUBO); + OPCODE(UADDO); + OPCODE(USUBO); + OPCODE(ADDCARRY); + OPCODE(SUBCARRY); + OPCODE(GET_CCMASK); OPCODE(MVC); OPCODE(MVC_LOOP); OPCODE(NC); @@ -5049,13 +5124,14 @@ SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, if (Opcode == ISD::BITCAST) // Look through bitcasts. Op = Op.getOperand(0); - else if (Opcode == ISD::VECTOR_SHUFFLE && + else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && canTreatAsByteVector(Op.getValueType())) { // Get a VPERM-like permute mask and see whether the bytes covered // by the extracted element are a contiguous sequence from one // source operand. SmallVector<int, SystemZ::VectorBytes> Bytes; - getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes); + if (!getVPermMask(Op, Bytes)) + break; int First; if (!getShuffleInput(Bytes, Index * BytesPerElement, BytesPerElement, First)) @@ -5174,6 +5250,54 @@ SDValue SystemZTargetLowering::combineTruncateExtract( return SDValue(); } +SDValue SystemZTargetLowering::combineZERO_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { + auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0)); + auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (TrueOp && FalseOp) { + SDLoc DL(N0); + SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), + DAG.getConstant(FalseOp->getZExtValue(), DL, VT), + N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; + SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); + // If N0 has multiple uses, change other uses as well. + if (!N0.hasOneUse()) { + SDValue TruncSelect = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); + DCI.CombineTo(N0.getNode(), TruncSelect); + } + return NewSelect; + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) + // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) + // into (select_cc LHS, RHS, -1, 0, COND) + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) + N0 = N0.getOperand(0); + if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { + SDLoc DL(N0); + SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + return SDValue(); +} + SDValue SystemZTargetLowering::combineSIGN_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (sext (ashr (shl X, C1), C2)) to @@ -5249,7 +5373,7 @@ SDValue SystemZTargetLowering::combineSTORE( // for the extraction to be done on a vMiN value, so that we can use VSTE. // If X has wider elements then convert it to: // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). - if (MemVT.isInteger()) { + if (MemVT.isInteger() && SN->isTruncatingStore()) { if (SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { DCI.AddToWorklist(Value.getNode()); @@ -5261,9 +5385,7 @@ SDValue SystemZTargetLowering::combineSTORE( } } // Combine STORE (BSWAP) into STRVH/STRV/STRVG - // See comment in combineBSWAP about volatile accesses. if (!SN->isTruncatingStore() && - !SN->isVolatile() && Op1.getOpcode() == ISD::BSWAP && Op1.getNode()->hasOneUse() && (Op1.getValueType() == MVT::i16 || @@ -5364,13 +5486,10 @@ SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BSWAP (LOAD) into LRVH/LRV/LRVG - // These loads are allowed to access memory multiple times, and so we must check - // that the loads are not volatile before performing the combine. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 || - N->getValueType(0) == MVT::i64) && - !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) { + N->getValueType(0) == MVT::i64)) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast<LoadSDNode>(Load); @@ -5475,11 +5594,157 @@ SDValue SystemZTargetLowering::combineSHIFTROT( return SDValue(); } +static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { + // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code + // set by the CCReg instruction using the CCValid / CCMask masks, + // If the CCReg instruction is itself a (ICMP (SELECT_CCMASK)) testing + // the condition code set by some other instruction, see whether we + // can directly use that condition code. + bool Invert = false; + + // Verify that we have an appropriate mask for a EQ or NE comparison. + if (CCValid != SystemZ::CCMASK_ICMP) + return false; + if (CCMask == SystemZ::CCMASK_CMP_NE) + Invert = !Invert; + else if (CCMask != SystemZ::CCMASK_CMP_EQ) + return false; + + // Verify that we have an ICMP that is the user of a SELECT_CCMASK. + SDNode *ICmp = CCReg.getNode(); + if (ICmp->getOpcode() != SystemZISD::ICMP) + return false; + SDNode *Select = ICmp->getOperand(0).getNode(); + if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) + return false; + + // Verify that the ICMP compares against one of select values. + auto *CompareVal = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); + if (!CompareVal) + return false; + auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0)); + if (!TrueVal) + return false; + auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1)); + if (!FalseVal) + return false; + if (CompareVal->getZExtValue() == FalseVal->getZExtValue()) + Invert = !Invert; + else if (CompareVal->getZExtValue() != TrueVal->getZExtValue()) + return false; + + // Compute the effective CC mask for the new branch or select. + auto *NewCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2)); + auto *NewCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3)); + if (!NewCCValid || !NewCCMask) + return false; + CCValid = NewCCValid->getZExtValue(); + CCMask = NewCCMask->getZExtValue(); + if (Invert) + CCMask ^= CCValid; + + // Return the updated CCReg link. + CCReg = Select->getOperand(4); + return true; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. + auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue Chain = N->getOperand(0); + SDValue CCReg = N->getOperand(4); + + if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), + Chain, + DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + N->getOperand(3), CCReg); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSELECT_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. + auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2)); + auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue CCReg = N->getOperand(4); + + if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) + return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), + N->getOperand(0), + N->getOperand(1), + DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + CCReg); + return SDValue(); +} + + +SDValue SystemZTargetLowering::combineGET_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + + // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible + auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); + if (!CCValid || !CCMask) + return SDValue(); + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + + SDValue Select = N->getOperand(0); + if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) + return SDValue(); + + auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2)); + auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3)); + if (!SelectCCValid || !SelectCCMask) + return SDValue(); + int SelectCCValidVal = SelectCCValid->getZExtValue(); + int SelectCCMaskVal = SelectCCMask->getZExtValue(); + + auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0)); + auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1)); + if (!TrueVal || !FalseVal) + return SDValue(); + if (TrueVal->getZExtValue() != 0 && FalseVal->getZExtValue() == 0) + ; + else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() != 0) + SelectCCMaskVal ^= SelectCCValidVal; + else + return SDValue(); + + if (SelectCCValidVal & ~CCValidVal) + return SDValue(); + if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) + return SDValue(); + + return Select->getOperand(4); +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { default: break; + case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); + case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); case SystemZISD::MERGE_HIGH: case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); @@ -5491,11 +5756,303 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRA: case ISD::SRL: case ISD::ROTL: return combineSHIFTROT(N, DCI); + case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); + case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); + case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); } return SDValue(); } +// Return the demanded elements for the OpNo source operand of Op. DemandedElts +// are for Op. +static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, + unsigned OpNo) { + EVT VT = Op.getValueType(); + unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); + APInt SrcDemE; + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + // VECTOR PACK truncates the elements of two source vectors into one. + SrcDemE = DemandedElts; + if (OpNo == 2) + SrcDemE.lshrInPlace(NumElts / 2); + SrcDemE = SrcDemE.trunc(NumElts / 2); + break; + // VECTOR UNPACK extends half the elements of the source vector. + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + SrcDemE = APInt(NumElts * 2, 0); + SrcDemE.insertBits(DemandedElts, 0); + break; + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + SrcDemE = APInt(NumElts * 2, 0); + SrcDemE.insertBits(DemandedElts, NumElts); + break; + case Intrinsic::s390_vpdi: { + // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. + SrcDemE = APInt(NumElts, 0); + if (!DemandedElts[OpNo - 1]) + break; + unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned MaskBit = ((OpNo - 1) ? 1 : 4); + // Demand input element 0 or 1, given by the mask bit value. + SrcDemE.setBit((Mask & MaskBit)? 1 : 0); + break; + } + case Intrinsic::s390_vsldb: { + // VECTOR SHIFT LEFT DOUBLE BY BYTE + assert(VT == MVT::v16i8 && "Unexpected type."); + unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); + unsigned NumSrc0Els = 16 - FirstIdx; + SrcDemE = APInt(NumElts, 0); + if (OpNo == 1) { + APInt DemEls = DemandedElts.trunc(NumSrc0Els); + SrcDemE.insertBits(DemEls, FirstIdx); + } else { + APInt DemEls = DemandedElts.lshr(NumSrc0Els); + SrcDemE.insertBits(DemEls, 0); + } + break; + } + case Intrinsic::s390_vperm: + SrcDemE = APInt(NumElts, 1); + break; + default: + llvm_unreachable("Unhandled intrinsic."); + break; + } + } else { + switch (Opcode) { + case SystemZISD::JOIN_DWORDS: + // Scalar operand. + SrcDemE = APInt(1, 1); + break; + case SystemZISD::SELECT_CCMASK: + SrcDemE = DemandedElts; + break; + default: + llvm_unreachable("Unhandled opcode."); + break; + } + } + return SrcDemE; +} + +static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth, + unsigned OpNo) { + APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); + APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); + unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); + KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth); + DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1); + DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1); + Known.Zero = LHSKnown.Zero & RHSKnown.Zero; + Known.One = LHSKnown.One & RHSKnown.One; +} + +void +SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + Known.resetAll(); + + // Intrinsic CC result is returned in the two low bits. + unsigned tmp0, tmp1; // not used + if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { + Known.Zero.setBitsFrom(2); + return; + } + EVT VT = Op.getValueType(); + if (Op.getResNo() != 0 || VT == MVT::Untyped) + return; + assert (Known.getBitWidth() == VT.getScalarSizeInBits() && + "KnownBits does not match VT in bitwidth"); + assert ((!VT.isVector() || + (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && + "DemandedElts does not match VT number of elements"); + unsigned BitWidth = Known.getBitWidth(); + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + bool IsLogical = false; + unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + case Intrinsic::s390_vpdi: + case Intrinsic::s390_vsldb: + case Intrinsic::s390_vperm: + computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); + break; + case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + IsLogical = true; + LLVM_FALLTHROUGH; + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: { + SDValue SrcOp = Op.getOperand(1); + unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits(); + Known = KnownBits(SrcBitWidth); + APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); + DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1); + if (IsLogical) { + Known = Known.zext(BitWidth); + Known.Zero.setBitsFrom(SrcBitWidth); + } else + Known = Known.sext(BitWidth); + break; + } + default: + break; + } + } else { + switch (Opcode) { + case SystemZISD::JOIN_DWORDS: + case SystemZISD::SELECT_CCMASK: + computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); + break; + case SystemZISD::REPLICATE: { + SDValue SrcOp = Op.getOperand(0); + DAG.computeKnownBits(SrcOp, Known, Depth + 1); + if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp)) + Known = Known.sext(BitWidth); // VREPI sign extends the immedate. + break; + } + default: + break; + } + } + + // Known has the width of the source operand(s). Adjust if needed to match + // the passed bitwidth. + if (Known.getBitWidth() != BitWidth) + Known = Known.zextOrTrunc(BitWidth); +} + +static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth, + unsigned OpNo) { + APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); + unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); + if (LHS == 1) return 1; // Early out. + APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); + unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); + if (RHS == 1) return 1; // Early out. + unsigned Common = std::min(LHS, RHS); + unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); + if (SrcBitWidth > VTBits) { // PACK + unsigned SrcExtraBits = SrcBitWidth - VTBits; + if (Common > SrcExtraBits) + return (Common - SrcExtraBits); + return 1; + } + assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); + return Common; +} + +unsigned +SystemZTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + unsigned Depth) const { + if (Op.getResNo() != 0) + return 1; + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::INTRINSIC_WO_CHAIN) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpksh: // PACKS + case Intrinsic::s390_vpksf: + case Intrinsic::s390_vpksg: + case Intrinsic::s390_vpkshs: // PACKS_CC + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + case Intrinsic::s390_vpklsh: // PACKLS + case Intrinsic::s390_vpklsf: + case Intrinsic::s390_vpklsg: + case Intrinsic::s390_vpklshs: // PACKLS_CC + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + case Intrinsic::s390_vpdi: + case Intrinsic::s390_vsldb: + case Intrinsic::s390_vperm: + return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); + case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: { + SDValue PackedOp = Op.getOperand(1); + APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); + unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getScalarSizeInBits(); + Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); + return Tmp; + } + default: + break; + } + } else { + switch (Opcode) { + case SystemZISD::SELECT_CCMASK: + return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); + default: + break; + } + } + + return 1; +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// @@ -5546,34 +6103,141 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, return Reg; } +// The CC operand of MI might be missing a kill marker because there +// were multiple uses of CC, and ISel didn't know which to mark. +// Figure out whether MI should have had a kill marker. +static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { + // Scan forward through BB for a use/def of CC. + MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); + for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { + const MachineInstr& mi = *miI; + if (mi.readsRegister(SystemZ::CC)) + return false; + if (mi.definesRegister(SystemZ::CC)) + break; // Should have kill-flag - update below. + } + + // If we hit the end of the block, check whether CC is live into a + // successor. + if (miI == MBB->end()) { + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return false; + } + + return true; +} + +// Return true if it is OK for this Select pseudo-opcode to be cascaded +// together with other Select pseudo-opcodes into a single basic-block with +// a conditional jump around it. +static bool isSelectPseudo(MachineInstr &MI) { + switch (MI.getOpcode()) { + case SystemZ::Select32: + case SystemZ::Select64: + case SystemZ::SelectF32: + case SystemZ::SelectF64: + case SystemZ::SelectF128: + case SystemZ::SelectVR32: + case SystemZ::SelectVR64: + case SystemZ::SelectVR128: + return true; + + default: + return false; + } +} + +// Helper function, which inserts PHI functions into SinkMBB: +// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], +// where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects +// in [MIItBegin, MIItEnd) range. +static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, + MachineBasicBlock::iterator MIItEnd, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, + MachineBasicBlock *SinkMBB) { + MachineFunction *MF = TrueMBB->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + unsigned CCValid = MIItBegin->getOperand(3).getImm(); + unsigned CCMask = MIItBegin->getOperand(4).getImm(); + DebugLoc DL = MIItBegin->getDebugLoc(); + + MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); + + // As we are creating the PHIs, we have to be careful if there is more than + // one. Later Selects may reference the results of earlier Selects, but later + // PHIs have to reference the individual true/false inputs from earlier PHIs. + // That also means that PHI construction must work forward from earlier to + // later, and that the code must maintain a mapping from earlier PHI's + // destination registers, and the registers that went into the PHI. + DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; + + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { + unsigned DestReg = MIIt->getOperand(0).getReg(); + unsigned TrueReg = MIIt->getOperand(1).getReg(); + unsigned FalseReg = MIIt->getOperand(2).getReg(); + + // If this Select we are generating is the opposite condition from + // the jump we generated, then we have to swap the operands for the + // PHI that is going to be generated. + if (MIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) + std::swap(TrueReg, FalseReg); + + if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end()) + TrueReg = RegRewriteTable[TrueReg].first; + + if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end()) + FalseReg = RegRewriteTable[FalseReg].second; + + BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) + .addReg(TrueReg).addMBB(TrueMBB) + .addReg(FalseReg).addMBB(FalseMBB); + + // Add this PHI to the rewrite table. + RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); + } +} + // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, - MachineBasicBlock *MBB, - unsigned LOCROpcode) const { + MachineBasicBlock *MBB) const { const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned TrueReg = MI.getOperand(1).getReg(); - unsigned FalseReg = MI.getOperand(2).getReg(); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); DebugLoc DL = MI.getDebugLoc(); - // Use LOCROpcode if possible. - if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) { - BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg) - .addReg(FalseReg).addReg(TrueReg) - .addImm(CCValid).addImm(CCMask); - MI.eraseFromParent(); - return MBB; - } + // If we have a sequence of Select* pseudo instructions using the + // same condition code value, we want to expand all of them into + // a single pair of basic blocks using the same condition. + MachineInstr *LastMI = &MI; + MachineBasicBlock::iterator NextMIIt = + std::next(MachineBasicBlock::iterator(MI)); + + if (isSelectPseudo(MI)) + while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) && + NextMIIt->getOperand(3).getImm() == CCValid && + (NextMIIt->getOperand(4).getImm() == CCMask || + NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) { + LastMI = &*NextMIIt; + ++NextMIIt; + } MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + // Unless CC was killed in the last Select instruction, mark it as + // live-in to both FalseMBB and JoinMBB. + if (!LastMI->killsRegister(SystemZ::CC) && !checkCCKill(*LastMI, JoinMBB)) { + FalseMBB->addLiveIn(SystemZ::CC); + JoinMBB->addLiveIn(SystemZ::CC); + } + // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB @@ -5592,11 +6256,12 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; - BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) - .addReg(TrueReg).addMBB(StartMBB) - .addReg(FalseReg).addMBB(FalseMBB); + MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); + MachineBasicBlock::iterator MIItEnd = + std::next(MachineBasicBlock::iterator(LastMI)); + createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB); - MI.eraseFromParent(); + StartMBB->erase(MIItBegin, MIItEnd); return JoinMBB; } @@ -5658,6 +6323,13 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + // Unless CC was killed in the CondStore instruction, mark it as + // live-in to both FalseMBB and JoinMBB. + if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) { + FalseMBB->addLiveIn(SystemZ::CC); + JoinMBB->addLiveIn(SystemZ::CC); + } + // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB @@ -6223,6 +6895,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( DestBase = MachineOperand::CreateReg(NextDestReg, false); SrcBase = MachineOperand::CreateReg(NextSrcReg, false); Length &= 255; + if (EndMBB && !Length) + // If the loop handled the whole CLC range, DoneMBB will be empty with + // CC live-through into EndMBB, so add it as live-in. + DoneMBB->addLiveIn(SystemZ::CC); MBB = DoneMBB; } // Handle any remaining bytes with straight-line code. @@ -6415,18 +7091,15 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { - case SystemZ::Select32Mux: - return emitSelect(MI, MBB, - Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0); case SystemZ::Select32: - return emitSelect(MI, MBB, SystemZ::LOCR); case SystemZ::Select64: - return emitSelect(MI, MBB, SystemZ::LOCGR); case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: + case SystemZ::SelectVR32: + case SystemZ::SelectVR64: case SystemZ::SelectVR128: - return emitSelect(MI, MBB, 0); + return emitSelect(MI, MBB); case SystemZ::CondStore8Mux: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); @@ -6675,6 +7348,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::LTXBRCompare_VecPseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); + case TargetOpcode::STACKMAP: + case TargetOpcode::PATCHPOINT: + return emitPatchPoint(MI, MBB); + default: llvm_unreachable("Unexpected instr type to insert"); } |