diff options
Diffstat (limited to 'lib/Target/SystemZ/SystemZISelLowering.cpp')
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 1043 |
1 files changed, 673 insertions, 370 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index b0a6127646364..14991bbbd3653 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -184,8 +184,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // No special instructions for these. setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); // Use *MUL_LOHI where possible instead of MULH*. @@ -216,6 +214,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + // Traps are legal, as we will convert them to "j .+2". + setOperationAction(ISD::TRAP, MVT::Other, Legal); + // z10 has instructions for signed but not unsigned FP conversion. // Handle unsigned 32-bit types as signed 64-bit types. if (!Subtarget.hasFPExtension()) { @@ -253,6 +256,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // We need to handle dynamic allocations specially because of the // 160-byte area at the bottom of the stack. setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); // Use custom expanders so that we can force the function to use // a frame pointer. @@ -310,8 +314,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); // Convert a GPR scalar to a vector by inserting it into element 0. setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); @@ -437,6 +439,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::BSWAP); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::ROTL); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -799,7 +806,7 @@ static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) { // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. -static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, +static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Chain, SDValue Value) { // If the argument has been promoted from a smaller type, insert an @@ -813,16 +820,12 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, if (VA.isExtInLoc()) Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); - else if (VA.getLocInfo() == CCValAssign::Indirect) - Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, - MachinePointerInfo(), false, false, false, 0); else if (VA.getLocInfo() == CCValAssign::BCvt) { // If this is a short vector argument loaded from the stack, // extend from i64 to full vector size and then bitcast. assert(VA.getLocVT() == MVT::i64); assert(VA.getValVT().isVector()); - Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64, - Value, DAG.getUNDEF(MVT::i64)); + Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); } else assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); @@ -832,7 +835,7 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, // Value is a value of type VA.getValVT() that we need to copy into // the location described by VA. Return a copy of Value converted to // VA.getValVT(). The caller is responsible for handling indirect values. -static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, +static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Value) { switch (VA.getLocInfo()) { case CCValAssign::SExt: @@ -856,11 +859,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, } } -SDValue SystemZTargetLowering:: -LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue SystemZTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -868,6 +870,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, MF.getInfo<SystemZMachineFunctionInfo>(); auto *TFL = static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Detect unsupported vector argument types. if (Subtarget.hasVector()) @@ -930,19 +933,34 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, // Create the SelectionDAG nodes corresponding to a load // from this parameter. Unpromoted ints and floats are // passed as right-justified 8-byte values. - EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, - MachinePointerInfo::getFixedStack(MF, FI), false, - false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI)); } // Convert the value of the argument register into the value that's // being passed. - InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); + if (VA.getLocInfo() == CCValAssign::Indirect) { + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, + MachinePointerInfo())); + // If the original argument was split (e.g. i128), we need + // to load all parts of it here (using the same address). + unsigned ArgIndex = Ins[I].OrigArgIndex; + assert (Ins[I].PartOffset == 0); + while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { + CCValAssign &PartVA = ArgLocs[I + 1]; + unsigned PartOffset = Ins[I + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, + DAG.getIntPtrConstant(PartOffset, DL)); + InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, + MachinePointerInfo())); + ++I; + } + } else + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); } if (IsVarArg) { @@ -973,8 +991,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, &SystemZ::FP64BitRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, - MachinePointerInfo::getFixedStack(MF, FI), - false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI)); } // Join the stores, which are independent of one another. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, @@ -987,9 +1004,11 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, } static bool canUseSiblingCall(const CCState &ArgCCInfo, - SmallVectorImpl<CCValAssign> &ArgLocs) { + SmallVectorImpl<CCValAssign> &ArgLocs, + SmallVectorImpl<ISD::OutputArg> &Outs) { // Punt if there are any indirect or stack arguments, or if the call - // needs the call-saved argument register R6. + // needs the callee-saved argument register R6, or if the call uses + // the callee-saved register arguments SwiftSelf and SwiftError. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; if (VA.getLocInfo() == CCValAssign::Indirect) @@ -999,6 +1018,8 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo, unsigned Reg = VA.getLocReg(); if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; + if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) + return false; } return true; } @@ -1032,7 +1053,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // We don't support GuaranteedTailCallOpt, only automatically-detected // sibling calls. - if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) + if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) IsTailCall = false; // Get a count of how many bytes are to be pushed on the stack. @@ -1054,11 +1075,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); + SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); - MemOpChains.push_back(DAG.getStore( - Chain, DL, ArgValue, SpillSlot, - MachinePointerInfo::getFixedStack(MF, FI), false, false, 0)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + // If the original argument was split (e.g. i128), we need + // to store all parts of it here (and pass just one address). + unsigned ArgIndex = Outs[I].OrigArgIndex; + assert (Outs[I].PartOffset == 0); + while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[I + 1]; + unsigned PartOffset = Outs[I + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, + DAG.getIntPtrConstant(PartOffset, DL)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, PartValue, Address, + MachinePointerInfo::getFixedStack(MF, FI))); + ++I; + } ArgValue = SpillSlot; } else ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); @@ -1080,9 +1115,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getIntPtrConstant(Offset, DL)); // Emit the store. - MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, - MachinePointerInfo(), - false, false, 0)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } @@ -1180,17 +1214,23 @@ CanLowerReturn(CallingConv::ID CallConv, if (Subtarget.hasVector()) VerifyVectorTypes(Outs); + // Special case that we cannot easily detect in RetCC_SystemZ since + // i128 is not a legal type. + for (auto &Out : Outs) + if (Out.ArgVT == MVT::i128) + return false; + SmallVector<CCValAssign, 16> RetLocs; CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); } SDValue -SystemZTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool IsVarArg, +SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - SDLoc DL, SelectionDAG &DAG) const { + const SDLoc &DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // Detect unsupported vector return types. @@ -1235,8 +1275,8 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); } -SDValue SystemZTargetLowering:: -prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { +SDValue SystemZTargetLowering::prepareVolatileOrAtomicLoad( + SDValue Chain, const SDLoc &DL, SelectionDAG &DAG) const { return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); } @@ -1399,6 +1439,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { CCValid = SystemZ::CCMASK_VCMP; return true; + case Intrinsic::s390_tdc: + Opcode = SystemZISD::TDC; + CCValid = SystemZ::CCMASK_TDC; + return true; + default: return false; } @@ -1538,7 +1583,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { // If C can be converted to a comparison against zero, adjust the operands // as necessary. -static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.ICmpType == SystemZICMP::UnsignedOnly) return; @@ -1558,7 +1603,8 @@ static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, // adjust the operands as necessary. -static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. if (!C.Op0.hasOneUse() || @@ -1614,11 +1660,10 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { ISD::ZEXTLOAD); if (C.Op0.getValueType() != MVT::i32 || Load->getExtensionType() != ExtType) - C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, - Load->getChain(), Load->getBasePtr(), - Load->getPointerInfo(), Load->getMemoryVT(), - Load->isVolatile(), Load->isNonTemporal(), - Load->isInvariant(), Load->getAlignment()); + C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), + Load->getBasePtr(), Load->getPointerInfo(), + Load->getMemoryVT(), Load->getAlignment(), + Load->getMemOperand()->getFlags()); // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || @@ -1719,7 +1764,8 @@ static unsigned reverseCCMask(unsigned CCMask) { // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. -static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -1784,7 +1830,8 @@ static void adjustForLTGFR(Comparison &C) { // If C compares the truncation of an extending load, try to compare // the untruncated value instead. This exposes more opportunities to // reuse CC. -static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { if (C.Op0.getOpcode() == ISD::TRUNCATE && C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && @@ -1915,7 +1962,8 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // See whether C can be implemented as a TEST UNDER MASK instruction. // Update the arguments with the TM version if so. -static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { // Check that we have a comparison with a constant. auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); if (!ConstOp1) @@ -2036,7 +2084,7 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, SDLoc DL) { + ISD::CondCode Cond, const SDLoc &DL) { if (CmpOp1.getOpcode() == ISD::Constant) { uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; @@ -2089,7 +2137,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, } // Emit the comparison instruction described by C. -static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { +static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (!C.Op1.getNode()) { SDValue Op; switch (C.Op0.getOpcode()) { @@ -2119,9 +2167,9 @@ static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // Implement a 32-bit *MUL_LOHI operation by extending both operands to // 64 bits. Extend is the extension type to use. Store the high part // in Hi and the low part in Lo. -static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, - unsigned Extend, SDValue Op0, SDValue Op1, - SDValue &Hi, SDValue &Lo) { +static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, + SDValue Op0, SDValue Op1, SDValue &Hi, + SDValue &Lo) { Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); @@ -2136,10 +2184,9 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation // on the extended Op0 and (unextended) Op1. Store the even register result // in Even and the odd register result in Odd. -static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, - unsigned Extend, unsigned Opcode, - SDValue Op0, SDValue Op1, - SDValue &Even, SDValue &Odd) { +static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + unsigned Extend, unsigned Opcode, SDValue Op0, + SDValue Op1, SDValue &Even, SDValue &Odd) { SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, SDValue(In128, 0), Op1); @@ -2151,7 +2198,7 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, // Return an i32 value that is 1 if the CC value produced by Glue is // in the mask CCMask and 0 otherwise. CC is known to have a value // in CCValid, so other values can be ignored. -static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, +static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, unsigned CCValid, unsigned CCMask) { IPMConversion Conversion = getIPMConversion(CCValid, CCMask); SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); @@ -2220,7 +2267,7 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, // Return a v2f64 that contains the extended form of elements Start and Start+1 // of v4f32 value Op. -static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, +static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); @@ -2229,7 +2276,7 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, // producing a result of type VT. -static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, +static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, SDValue CmpOp1) { // There is no hardware support for v4f32, so extend the vector into // two v2f64s and compare those. @@ -2247,7 +2294,7 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing // an integer mask of type VT. -static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, SDValue CmpOp1) { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); @@ -2342,7 +2389,7 @@ static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { } // Return the absolute or negative absolute of Op; IsNegative decides which. -static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, +static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative) { Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); if (IsNegative) @@ -2414,11 +2461,10 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - Reloc::Model RM = DAG.getTarget().getRelocationModel(); CodeModel::Model CM = DAG.getTarget().getCodeModel(); SDValue Result; - if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { + if (Subtarget.isPC32DBLSymbol(GV, CM)) { // Assign anchors at 1<<12 byte boundaries. uint64_t Anchor = Offset & ~uint64_t(0xfff); Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); @@ -2435,8 +2481,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction())); } // If there was a non-zero offset that we didn't fold, create an explicit @@ -2495,14 +2540,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); } -SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, - SelectionDAG &DAG) const { - if (DAG.getTarget().Options.EmulatedTLS) - return LowerToTLSEmulatedModel(Node, DAG); - SDLoc DL(Node); - const GlobalValue *GV = Node->getGlobal(); +SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); - TLSModel::Model model = DAG.getTarget().getTLSModel(GV); // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, @@ -2517,7 +2557,19 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, // Merge them into a single 64-bit address. SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, DAG.getConstant(32, DL, PtrVT)); - SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); + return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); +} + +SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(Node, DAG); + SDLoc DL(Node); + const GlobalValue *GV = Node->getGlobal(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + + SDValue TP = lowerThreadPointer(DL, DAG); // Get the offset of GA from the thread pointer, based on the TLS model. SDValue Offset; @@ -2530,8 +2582,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getConstantPool(CPV, PtrVT, 8); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); @@ -2546,8 +2597,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getConstantPool(CPV, PtrVT, 8); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the module base offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); @@ -2565,8 +2615,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); DTPOffset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), DTPOffset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); break; @@ -2577,9 +2626,9 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_INDNTPOFF); Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); - Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + Offset = + DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); break; } @@ -2591,8 +2640,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getConstantPool(CPV, PtrVT, 8); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); break; } } @@ -2640,6 +2688,57 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } +SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + SDLoc DL(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // If the back chain frame index has not been allocated yet, do so. + SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>(); + int BackChainIdx = FI->getFramePointerSaveIndex(); + if (!BackChainIdx) { + // By definition, the frame address is the address of the back chain. + BackChainIdx = MFI->CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + FI->setFramePointerSaveIndex(BackChainIdx); + } + SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); + + // FIXME The frontend should detect this case. + if (Depth > 0) { + report_fatal_error("Unsupported stack frame traversal count"); + } + + return BackChain; +} + +SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDLoc DL(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // FIXME The frontend should detect this case. + if (Depth > 0) { + report_fatal_error("Unsupported stack frame traversal count"); + } + + // Return R14D, which has the return address. Mark it an implicit live-in. + unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); +} + SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -2715,8 +2814,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, DAG.getIntPtrConstant(Offset, DL)); MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, - MachinePointerInfo(SV, Offset), - false, false, 0); + MachinePointerInfo(SV, Offset)); Offset += 8; } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); @@ -2740,8 +2838,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); - bool RealignOpt = !DAG.getMachineFunction().getFunction()-> - hasFnAttribute("no-realign-stack"); + MachineFunction &MF = DAG.getMachineFunction(); + bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack"); + bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -2763,10 +2862,15 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { // Get a reference to the stack pointer. SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); + // If we need a backchain, save it now. + SDValue Backchain; + if (StoreBackchain) + Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo()); + // Add extra space for alignment if needed. if (ExtraAlignSpace) NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, - DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); @@ -2790,10 +2894,20 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); } + if (StoreBackchain) + Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo()); + SDValue Ops[2] = { Result, Chain }; return DAG.getMergeValues(Ops, DL); } +SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( + SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + + return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); +} + SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -3031,6 +3145,27 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, return Op; } +SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); + SynchronizationScope FenceScope = static_cast<SynchronizationScope>( + cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && + FenceScope == CrossThread) { + return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, + Op.getOperand(0)), + 0); + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -3220,8 +3355,24 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); - return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op), - SystemZ::R15D, Op.getOperand(1)); + bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + + SDValue Chain = Op.getOperand(0); + SDValue NewSP = Op.getOperand(1); + SDValue Backchain; + SDLoc DL(Op); + + if (StoreBackchain) { + SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64); + Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo()); + } + + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP); + + if (StoreBackchain) + Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo()); + + return Chain; } SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, @@ -3286,6 +3437,9 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (Id) { + case Intrinsic::thread_pointer: + return lowerThreadPointer(SDLoc(Op), DAG); + case Intrinsic::s390_vpdi: return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); @@ -3553,7 +3707,7 @@ static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, // Create a node that performs P on operands Op0 and Op1, casting the // operands to the appropriate type. The type of the result is determined by P. -static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, +static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1) { // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input // elements of a PACK are twice as wide as the outputs. @@ -3582,7 +3736,8 @@ static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Implement it on operands Ops[0] and Ops[1] using // VSLDI or VPERM. -static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, +static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, + SDValue *Ops, const SmallVectorImpl<int> &Bytes) { for (unsigned I = 0; I < 2; ++I) Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); @@ -3600,7 +3755,7 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); else IndexNodes[I] = DAG.getUNDEF(MVT::i32); - SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes); + SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); } @@ -3610,7 +3765,7 @@ struct GeneralShuffle { GeneralShuffle(EVT vt) : VT(vt) {} void addUndef(); void add(SDValue, unsigned); - SDValue getNode(SelectionDAG &, SDLoc); + SDValue getNode(SelectionDAG &, const SDLoc &); // The operands of the shuffle. SmallVector<SDValue, SystemZ::VectorBytes> Ops; @@ -3667,7 +3822,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) { } Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); Byte = unsigned(NewByte) % SystemZ::VectorBytes; - } else if (Op.getOpcode() == ISD::UNDEF) { + } else if (Op.isUndef()) { addUndef(); return; } else @@ -3689,7 +3844,7 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) { } // Return SDNodes for the completed shuffle. -SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { +SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); if (Ops.size() == 0) @@ -3770,37 +3925,37 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. static bool isScalarToVector(SDValue Op) { for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) - if (Op.getOperand(I).getOpcode() != ISD::UNDEF) + if (!Op.getOperand(I).isUndef()) return false; return true; } // Return a vector of type VT that contains Value in the first element. // The other elements don't matter. -static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value) { // If we have a constant, replicate it to all elements and let the // BUILD_VECTOR lowering take care of it. if (Value.getOpcode() == ISD::Constant || Value.getOpcode() == ISD::ConstantFP) { SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } - if (Value.getOpcode() == ISD::UNDEF) + if (Value.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); } // Return a vector of type VT in which Op0 is in element 0 and Op1 is in // element 1. Used for cases in which replication is cheap. -static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1) { - if (Op0.getOpcode() == ISD::UNDEF) { - if (Op1.getOpcode() == ISD::UNDEF) + if (Op0.isUndef()) { + if (Op1.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); } - if (Op1.getOpcode() == ISD::UNDEF) + if (Op1.isUndef()) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, buildScalarToVector(DAG, DL, VT, Op0), @@ -3809,15 +3964,15 @@ static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 // vector for them. -static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, +static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1) { - if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF) + if (Op0.isUndef() && Op1.isUndef()) return DAG.getUNDEF(MVT::v2i64); // If one of the two inputs is undefined then replicate the other one, // in order to avoid using another register unnecessarily. - if (Op0.getOpcode() == ISD::UNDEF) + if (Op0.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); - else if (Op1.getOpcode() == ISD::UNDEF) + else if (Op1.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); else { Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); @@ -3834,7 +3989,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { unsigned BytesPerElement = ElemVT.getStoreSize(); for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { SDValue Op = BVN->getOperand(I); - if (Op.getOpcode() != ISD::UNDEF) { + if (!Op.isUndef()) { uint64_t Value; if (Op.getOpcode() == ISD::Constant) Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue(); @@ -3862,7 +4017,7 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { // an empty value. static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, const SystemZInstrInfo *TII, - SDLoc DL, EVT VT, uint64_t Value, + const SDLoc &DL, EVT VT, uint64_t Value, unsigned BitsPerElement) { // Signed 16-bit values can be replicated using VREPI. int64_t SignedValue = SignExtend64(Value, BitsPerElement); @@ -3919,7 +4074,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); GS.add(Op.getOperand(0), Elem); FoundOne = true; - } else if (Op.getOpcode() == ISD::UNDEF) { + } else if (Op.isUndef()) { GS.addUndef(); } else { GS.add(SDValue(), ResidueOps.size()); @@ -3937,7 +4092,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); for (auto &Op : GS.Ops) { if (!Op.getNode()) { - Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps); + Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); break; } } @@ -3946,14 +4101,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, } // Combine GPR scalar values Elems into a vector of type VT. -static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, +static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl<SDValue> &Elems) { // See whether there is a single replicated value. SDValue Single; unsigned int NumElements = Elems.size(); unsigned int Count = 0; for (auto Elem : Elems) { - if (Elem.getOpcode() != ISD::UNDEF) { + if (!Elem.isUndef()) { if (!Single.getNode()) Single = Elem; else if (Elem != Single) { @@ -3998,9 +4153,9 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); // Avoid unnecessary undefs by reusing the other operand. - if (Op01.getOpcode() == ISD::UNDEF) + if (Op01.isUndef()) Op01 = Op23; - else if (Op23.getOpcode() == ISD::UNDEF) + else if (Op23.isUndef()) Op23 = Op01; // Merging identical replications is a no-op. if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) @@ -4034,7 +4189,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, for (unsigned I = 0; I < NumElements; ++I) if (!Constants[I].getNode()) Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); - Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants); + Result = DAG.getBuildVector(VT, DL, Constants); } else { // Otherwise try to use VLVGP to start the sequence in order to // avoid a false dependency on any previous contents of the vector @@ -4042,8 +4197,8 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, // is defined. unsigned I1 = NumElements / 2 - 1; unsigned I2 = NumElements - 1; - bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF); - bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF); + bool Def1 = !Elems[I1].isUndef(); + bool Def2 = !Elems[I2].isUndef(); if (Def1 || Def2) { SDValue Elem1 = Elems[Def1 ? I1 : I2]; SDValue Elem2 = Elems[Def2 ? I2 : I1]; @@ -4057,7 +4212,7 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, // Use VLVGx to insert the other elements. for (unsigned I = 0; I < NumElements; ++I) - if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF) + if (!Done[I] && !Elems[I].isUndef()) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], DAG.getConstant(I, DL, MVT::i32)); return Result; @@ -4120,8 +4275,7 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, } // See if we should use shuffles to construct the vector from other vectors. - SDValue Res = tryBuildVectorShuffle(DAG, BVN); - if (Res.getNode()) + if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) return Res; // Detect SCALAR_TO_VECTOR conversions. @@ -4312,6 +4466,10 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); case ISD::BR_CC: return lowerBR_CC(Op, DAG); case ISD::SELECT_CC: @@ -4336,6 +4494,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerVACOPY(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::GET_DYNAMIC_AREA_OFFSET: + return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); case ISD::SMUL_LOHI: return lowerSMUL_LOHI(Op, DAG); case ISD::UMUL_LOHI: @@ -4348,12 +4508,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerOR(Op, DAG); case ISD::CTPOP: return lowerCTPOP(Op, DAG); - case ISD::CTLZ_ZERO_UNDEF: - return DAG.getNode(ISD::CTLZ, SDLoc(Op), - Op.getValueType(), Op.getOperand(0)); - case ISD::CTTZ_ZERO_UNDEF: - return DAG.getNode(ISD::CTTZ, SDLoc(Op), - Op.getValueType(), Op.getOperand(0)); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -4457,6 +4613,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(MEMBARRIER); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); @@ -4506,6 +4663,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VISTR_CC); OPCODE(VSTRC_CC); OPCODE(VSTRCZ_CC); + OPCODE(TDC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -4518,6 +4676,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(LRV); + OPCODE(STRV); OPCODE(PREFETCH); } return nullptr; @@ -4535,8 +4695,9 @@ static bool canTreatAsByteVector(EVT VT) { // of the input vector and Index is the index (based on type VecVT) that // should be extracted. Return the new extraction if a simplification // was possible or if Force is true. -SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, - SDValue Op, unsigned Index, +SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, + EVT VecVT, SDValue Op, + unsigned Index, DAGCombinerInfo &DCI, bool Force) const { SelectionDAG &DAG = DCI.DAG; @@ -4639,9 +4800,8 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, // Optimize vector operations in scalar value Op on the basis that Op // is truncated to TruncVT. -SDValue -SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, - DAGCombinerInfo &DCI) const { +SDValue SystemZTargetLowering::combineTruncateExtract( + const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { // If we have (trunc (extract_vector_elt X, Y)), try to turn it into // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements // of type TruncVT. @@ -4675,145 +4835,295 @@ SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, return SDValue(); } -SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { +SDValue SystemZTargetLowering::combineSIGN_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (sext (ashr (shl X, C1), C2)) to + // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as + // cheap as narrower ones. SelectionDAG &DAG = DCI.DAG; - unsigned Opcode = N->getOpcode(); - if (Opcode == ISD::SIGN_EXTEND) { - // Convert (sext (ashr (shl X, C1), C2)) to - // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as - // cheap as narrower ones. - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { - auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - SDValue Inner = N0.getOperand(0); - if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { - if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { - unsigned Extra = (VT.getSizeInBits() - - N0.getValueType().getSizeInBits()); - unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; - unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; - EVT ShiftVT = N0.getOperand(1).getValueType(); - SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, - Inner.getOperand(0)); - SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, - DAG.getConstant(NewShlAmt, SDLoc(Inner), - ShiftVT)); - return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, - DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); - } + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { + auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + SDValue Inner = N0.getOperand(0); + if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { + if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { + unsigned Extra = (VT.getSizeInBits() - + N0.getValueType().getSizeInBits()); + unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; + unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; + EVT ShiftVT = N0.getOperand(1).getValueType(); + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, + Inner.getOperand(0)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, + DAG.getConstant(NewShlAmt, SDLoc(Inner), + ShiftVT)); + return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, + DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); } } } - if (Opcode == SystemZISD::MERGE_HIGH || - Opcode == SystemZISD::MERGE_LOW) { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - if (Op0.getOpcode() == ISD::BITCAST) - Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { - // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF - // for v4f32. - if (Op1 == N->getOperand(0)) - return Op1; - // (z_merge_? 0, X) -> (z_unpackl_? 0, X). - EVT VT = Op1.getValueType(); - unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); - if (ElemBytes <= 4) { - Opcode = (Opcode == SystemZISD::MERGE_HIGH ? - SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); - EVT InVT = VT.changeVectorElementTypeToInteger(); - EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), - SystemZ::VectorBytes / ElemBytes / 2); - if (VT != InVT) { - Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); - DCI.AddToWorklist(Op1.getNode()); - } - SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); - DCI.AddToWorklist(Op.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineMERGE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + unsigned Opcode = N->getOpcode(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (Op0.getOpcode() == SystemZISD::BYTE_MASK && + cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { + // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF + // for v4f32. + if (Op1 == N->getOperand(0)) + return Op1; + // (z_merge_? 0, X) -> (z_unpackl_? 0, X). + EVT VT = Op1.getValueType(); + unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); + if (ElemBytes <= 4) { + Opcode = (Opcode == SystemZISD::MERGE_HIGH ? + SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); + EVT InVT = VT.changeVectorElementTypeToInteger(); + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), + SystemZ::VectorBytes / ElemBytes / 2); + if (VT != InVT) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); + DCI.AddToWorklist(Op1.getNode()); } + SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); + DCI.AddToWorklist(Op.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSTORE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + auto *SN = cast<StoreSDNode>(N); + auto &Op1 = N->getOperand(1); + EVT MemVT = SN->getMemoryVT(); // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better // for the extraction to be done on a vMiN value, so that we can use VSTE. // If X has wider elements then convert it to: // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). - if (Opcode == ISD::STORE) { - auto *SN = cast<StoreSDNode>(N); - EVT MemVT = SN->getMemoryVT(); - if (MemVT.isInteger()) { - SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, - SN->getValue(), DCI); - if (Value.getNode()) { - DCI.AddToWorklist(Value.getNode()); - - // Rewrite the store with the new form of stored value. - return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, - SN->getBasePtr(), SN->getMemoryVT(), - SN->getMemOperand()); - } + if (MemVT.isInteger()) { + if (SDValue Value = + combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { + DCI.AddToWorklist(Value.getNode()); + + // Rewrite the store with the new form of stored value. + return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, + SN->getBasePtr(), SN->getMemoryVT(), + SN->getMemOperand()); } } - // Try to simplify a vector extraction. - if (Opcode == ISD::EXTRACT_VECTOR_ELT) { - if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { - SDValue Op0 = N->getOperand(0); - EVT VecVT = Op0.getValueType(); - return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, - IndexN->getZExtValue(), DCI, false); + // Combine STORE (BSWAP) into STRVH/STRV/STRVG + // See comment in combineBSWAP about volatile accesses. + if (!SN->isVolatile() && + Op1.getOpcode() == ISD::BSWAP && + Op1.getNode()->hasOneUse() && + (Op1.getValueType() == MVT::i16 || + Op1.getValueType() == MVT::i32 || + Op1.getValueType() == MVT::i64)) { + + SDValue BSwapOp = Op1.getOperand(0); + + if (BSwapOp.getValueType() == MVT::i16) + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); + + SDValue Ops[] = { + N->getOperand(0), BSwapOp, N->getOperand(2), + DAG.getValueType(Op1.getValueType()) + }; + + return + DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( + SDNode *N, DAGCombinerInfo &DCI) const { + // Try to simplify a vector extraction. + if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + SDValue Op0 = N->getOperand(0); + EVT VecVT = Op0.getValueType(); + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineJOIN_DWORDS( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; // (join_dwords X, X) == (replicate X) - if (Opcode == SystemZISD::JOIN_DWORDS && - N->getOperand(0) == N->getOperand(1)) + if (N->getOperand(0) == N->getOperand(1)) return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), N->getOperand(0)); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineFP_ROUND( + SDNode *N, DAGCombinerInfo &DCI) const { // (fround (extract_vector_elt X 0)) // (fround (extract_vector_elt X 1)) -> // (extract_vector_elt (VROUND X) 0) // (extract_vector_elt (VROUND X) 1) // // This is a special case since the target doesn't really support v2f32s. - if (Opcode == ISD::FP_ROUND) { - SDValue Op0 = N->getOperand(0); - if (N->getValueType(0) == MVT::f32 && - Op0.hasOneUse() && - Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - Op0.getOperand(0).getValueType() == MVT::v2f64 && - Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { - SDValue Vec = Op0.getOperand(0); - for (auto *U : Vec->uses()) { - if (U != Op0.getNode() && - U->hasOneUse() && - U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - U->getOperand(0) == Vec && - U->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { - SDValue OtherRound = SDValue(*U->use_begin(), 0); - if (OtherRound.getOpcode() == ISD::FP_ROUND && - OtherRound.getOperand(0) == SDValue(U, 0) && - OtherRound.getValueType() == MVT::f32) { - SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), - MVT::v4f32, Vec); - DCI.AddToWorklist(VRound.getNode()); - SDValue Extract1 = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, - VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); - DCI.AddToWorklist(Extract1.getNode()); - DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); - SDValue Extract0 = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, - VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); - return Extract0; - } + SelectionDAG &DAG = DCI.DAG; + SDValue Op0 = N->getOperand(0); + if (N->getValueType(0) == MVT::f32 && + Op0.hasOneUse() && + Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getValueType() == MVT::v2f64 && + Op0.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + SDValue Vec = Op0.getOperand(0); + for (auto *U : Vec->uses()) { + if (U != Op0.getNode() && + U->hasOneUse() && + U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + U->getOperand(0) == Vec && + U->getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { + SDValue OtherRound = SDValue(*U->use_begin(), 0); + if (OtherRound.getOpcode() == ISD::FP_ROUND && + OtherRound.getOperand(0) == SDValue(U, 0) && + OtherRound.getValueType() == MVT::f32) { + SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); + DCI.AddToWorklist(VRound.getNode()); + SDValue Extract1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, + VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); + DCI.AddToWorklist(Extract1.getNode()); + DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + SDValue Extract0 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, + VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + return Extract0; + } + } + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineBSWAP( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // Combine BSWAP (LOAD) into LRVH/LRV/LRVG + // These loads are allowed to access memory multiple times, and so we must check + // that the loads are not volatile before performing the combine. + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 || + N->getValueType(0) == MVT::i64) && + !cast<LoadSDNode>(N->getOperand(0))->isVolatile()) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + + // Create the byte-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(N->getValueType(0)) // VT + }; + SDValue BSLoad = + DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), + DAG.getVTList(N->getValueType(0) == MVT::i64 ? + MVT::i64 : MVT::i32, MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + + // If this is an i16 load, insert the truncate. + SDValue ResVal = BSLoad; + if (N->getValueType(0) == MVT::i16) + ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); + + // First, combine the bswap away. This makes the value produced by the + // load dead. + DCI.CombineTo(N, ResVal); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the bswap is dead. + DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSHIFTROT( + SDNode *N, DAGCombinerInfo &DCI) const { + + SelectionDAG &DAG = DCI.DAG; + + // Shift/rotate instructions only use the last 6 bits of the second operand + // register. If the second operand is the result of an AND with an immediate + // value that has its last 6 bits set, we can safely remove the AND operation. + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::AND) { + auto *AndMask = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + + // The AND mask is constant + if (AndMask) { + auto AmtVal = AndMask->getZExtValue(); + + // Bottom 6 bits are set + if ((AmtVal & 0x3f) == 0x3f) { + SDValue AndOp = N1->getOperand(0); + + // This is the only use, so remove the node + if (N1.hasOneUse()) { + // Combine the AND away + DCI.CombineTo(N1.getNode(), AndOp); + + // Return N so it isn't rechecked + return SDValue(N, 0); + + // The node will be reused, so create a new node for this one use + } else { + SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0), N->getOperand(0), + AndOp); + DCI.AddToWorklist(Replace.getNode()); + + return Replace; } } } } + + return SDValue(); +} + +SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch(N->getOpcode()) { + default: break; + case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); + case SystemZISD::MERGE_HIGH: + case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); + case ISD::STORE: return combineSTORE(N, DCI); + case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); + case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); + case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::BSWAP: return combineBSWAP(N, DCI); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: return combineSHIFTROT(N, DCI); + } + return SDValue(); } @@ -4831,7 +5141,7 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { // Split MBB after MI and return the new block (the one that contains // instructions after MI). -static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, +static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB) { MachineBasicBlock *NewMBB = emitBlockAfter(MBB); NewMBB->splice(NewMBB->begin(), MBB, @@ -4841,7 +5151,7 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, } // Split MBB before MI and return the new block (the one that contains MI). -static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, +static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB) { MachineBasicBlock *NewMBB = emitBlockAfter(MBB); NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); @@ -4850,34 +5160,36 @@ static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, } // Force base value Base into a register before MI. Return the register. -static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, +static unsigned forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII) { if (Base.isReg()) return Base.getReg(); - MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock *MBB = MI.getParent(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg) - .addOperand(Base).addImm(0).addReg(0); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) + .addOperand(Base) + .addImm(0) + .addReg(0); return Reg; } // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * -SystemZTargetLowering::emitSelect(MachineInstr *MI, +SystemZTargetLowering::emitSelect(MachineInstr &MI, MachineBasicBlock *MBB) const { const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned TrueReg = MI->getOperand(1).getReg(); - unsigned FalseReg = MI->getOperand(2).getReg(); - unsigned CCValid = MI->getOperand(3).getImm(); - unsigned CCMask = MI->getOperand(4).getImm(); - DebugLoc DL = MI->getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned TrueReg = MI.getOperand(1).getReg(); + unsigned FalseReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); @@ -4905,7 +5217,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, .addReg(TrueReg).addMBB(StartMBB) .addReg(FalseReg).addMBB(FalseMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return JoinMBB; } @@ -4913,21 +5225,21 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, // StoreOpcode is the store to use and Invert says whether the store should // happen when the condition is false rather than true. If a STORE ON // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. -MachineBasicBlock * -SystemZTargetLowering::emitCondStore(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned StoreOpcode, unsigned STOCOpcode, - bool Invert) const { +MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, + unsigned STOCOpcode, + bool Invert) const { const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); - unsigned SrcReg = MI->getOperand(0).getReg(); - MachineOperand Base = MI->getOperand(1); - int64_t Disp = MI->getOperand(2).getImm(); - unsigned IndexReg = MI->getOperand(3).getReg(); - unsigned CCValid = MI->getOperand(4).getImm(); - unsigned CCMask = MI->getOperand(5).getImm(); - DebugLoc DL = MI->getDebugLoc(); + unsigned SrcReg = MI.getOperand(0).getReg(); + MachineOperand Base = MI.getOperand(1); + int64_t Disp = MI.getOperand(2).getImm(); + unsigned IndexReg = MI.getOperand(3).getReg(); + unsigned CCValid = MI.getOperand(4).getImm(); + unsigned CCMask = MI.getOperand(5).getImm(); + DebugLoc DL = MI.getDebugLoc(); StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); @@ -4940,7 +5252,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) .addReg(SrcReg).addOperand(Base).addImm(Disp) .addImm(CCValid).addImm(CCMask); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } @@ -4969,7 +5281,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); MBB->addSuccessor(JoinMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return JoinMBB; } @@ -4980,12 +5292,9 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize // is one of the operands. Invert says whether the field should be // inverted after performing BinOpcode (e.g. for NAND). -MachineBasicBlock * -SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned BinOpcode, - unsigned BitSize, - bool Invert) const { +MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, + unsigned BitSize, bool Invert) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); @@ -4994,15 +5303,15 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, // Extract the operands. Base can be a register or a frame index. // Src2 can be a register or immediate. - unsigned Dest = MI->getOperand(0).getReg(); - MachineOperand Base = earlyUseOperand(MI->getOperand(1)); - int64_t Disp = MI->getOperand(2).getImm(); - MachineOperand Src2 = earlyUseOperand(MI->getOperand(3)); - unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); - unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); - DebugLoc DL = MI->getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); + unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); + DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) - BitSize = MI->getOperand(6).getImm(); + BitSize = MI.getOperand(6).getImm(); // Subword operations use 32-bit registers. const TargetRegisterClass *RC = (BitSize <= 32 ? @@ -5090,7 +5399,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } @@ -5100,12 +5409,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, // minimum or maximum value. KeepOldMask is the BRC condition-code mask // for when the current field should be kept. BitSize is the width of // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. -MachineBasicBlock * -SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned CompareOpcode, - unsigned KeepOldMask, - unsigned BitSize) const { +MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, + unsigned KeepOldMask, unsigned BitSize) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); @@ -5113,15 +5419,15 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI->getOperand(0).getReg(); - MachineOperand Base = earlyUseOperand(MI->getOperand(1)); - int64_t Disp = MI->getOperand(2).getImm(); - unsigned Src2 = MI->getOperand(3).getReg(); - unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); - unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); - DebugLoc DL = MI->getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + unsigned Src2 = MI.getOperand(3).getReg(); + unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0); + DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) - BitSize = MI->getOperand(6).getImm(); + BitSize = MI.getOperand(6).getImm(); // Subword operations use 32-bit registers. const TargetRegisterClass *RC = (BitSize <= 32 ? @@ -5209,30 +5515,31 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW // instruction MI. MachineBasicBlock * -SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, +SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI->getOperand(0).getReg(); - MachineOperand Base = earlyUseOperand(MI->getOperand(1)); - int64_t Disp = MI->getOperand(2).getImm(); - unsigned OrigCmpVal = MI->getOperand(3).getReg(); - unsigned OrigSwapVal = MI->getOperand(4).getReg(); - unsigned BitShift = MI->getOperand(5).getReg(); - unsigned NegBitShift = MI->getOperand(6).getReg(); - int64_t BitSize = MI->getOperand(7).getImm(); - DebugLoc DL = MI->getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI.getOperand(1)); + int64_t Disp = MI.getOperand(2).getImm(); + unsigned OrigCmpVal = MI.getOperand(3).getReg(); + unsigned OrigSwapVal = MI.getOperand(4).getReg(); + unsigned BitShift = MI.getOperand(5).getReg(); + unsigned NegBitShift = MI.getOperand(6).getReg(); + int64_t BitSize = MI.getOperand(7).getImm(); + DebugLoc DL = MI.getDebugLoc(); const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; @@ -5323,7 +5630,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } @@ -5331,18 +5638,18 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, // if the high register of the GR128 value must be cleared or false if // it's "don't care". SubReg is subreg_l32 when extending a GR32 // and subreg_l64 when extending a GR64. -MachineBasicBlock * -SystemZTargetLowering::emitExt128(MachineInstr *MI, - MachineBasicBlock *MBB, - bool ClearEven, unsigned SubReg) const { +MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, + MachineBasicBlock *MBB, + bool ClearEven, + unsigned SubReg) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); @@ -5359,25 +5666,23 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) .addReg(In128).addReg(Src).addImm(SubReg); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } -MachineBasicBlock * -SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode) const { +MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - MachineOperand DestBase = earlyUseOperand(MI->getOperand(0)); - uint64_t DestDisp = MI->getOperand(1).getImm(); - MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2)); - uint64_t SrcDisp = MI->getOperand(3).getImm(); - uint64_t Length = MI->getOperand(4).getImm(); + MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); + uint64_t DestDisp = MI.getOperand(1).getImm(); + MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2)); + uint64_t SrcDisp = MI.getOperand(3).getImm(); + uint64_t Length = MI.getOperand(4).getImm(); // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. @@ -5385,10 +5690,10 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, splitBlockAfter(MI, MBB) : nullptr); // Check for the loop form, in which operand 5 is the trip count. - if (MI->getNumExplicitOperands() > 5) { + if (MI.getNumExplicitOperands() > 5) { bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); - uint64_t StartCountReg = MI->getOperand(5).getReg(); + uint64_t StartCountReg = MI.getOperand(5).getReg(); uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); @@ -5491,15 +5796,19 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, // Apply them using LAY if so. if (!isUInt<12>(DestDisp)) { unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) - .addOperand(DestBase).addImm(DestDisp).addReg(0); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(DestBase) + .addImm(DestDisp) + .addReg(0); DestBase = MachineOperand::CreateReg(Reg, false); DestDisp = 0; } if (!isUInt<12>(SrcDisp)) { unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) - .addOperand(SrcBase).addImm(SrcDisp).addReg(0); + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(SrcBase) + .addImm(SrcDisp) + .addReg(0); SrcBase = MachineOperand::CreateReg(Reg, false); SrcDisp = 0; } @@ -5527,26 +5836,24 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, MBB->addLiveIn(SystemZ::CC); } - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } // Decompose string pseudo-instruction MI into a loop that continually performs // Opcode until CC != 3. -MachineBasicBlock * -SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode) const { +MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - uint64_t End1Reg = MI->getOperand(0).getReg(); - uint64_t Start1Reg = MI->getOperand(1).getReg(); - uint64_t Start2Reg = MI->getOperand(2).getReg(); - uint64_t CharReg = MI->getOperand(3).getReg(); + uint64_t End1Reg = MI.getOperand(0).getReg(); + uint64_t Start1Reg = MI.getOperand(1).getReg(); + uint64_t Start2Reg = MI.getOperand(2).getReg(); + uint64_t CharReg = MI.getOperand(3).getReg(); const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; uint64_t This1Reg = MRI.createVirtualRegister(RC); @@ -5589,26 +5896,24 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, DoneMBB->addLiveIn(SystemZ::CC); - MI->eraseFromParent(); + MI.eraseFromParent(); return DoneMBB; } // Update TBEGIN instruction with final opcode and register clobbers. -MachineBasicBlock * -SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode, - bool NoFloat) const { +MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, + bool NoFloat) const { MachineFunction &MF = *MBB->getParent(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); // Update opcode. - MI->setDesc(TII->get(Opcode)); + MI.setDesc(TII->get(Opcode)); // We cannot handle a TBEGIN that clobbers the stack or frame pointer. // Make sure to add the corresponding GRSM bits if they are missing. - uint64_t Control = MI->getOperand(2).getImm(); + uint64_t Control = MI.getOperand(2).getImm(); static const unsigned GPRControlBit[16] = { 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 @@ -5616,13 +5921,13 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, Control |= GPRControlBit[15]; if (TFI->hasFP(MF)) Control |= GPRControlBit[11]; - MI->getOperand(2).setImm(Control); + MI.getOperand(2).setImm(Control); // Add GPR clobbers. for (int I = 0; I < 16; I++) { if ((Control & GPRControlBit[I]) == 0) { unsigned Reg = SystemZMC::GR64Regs[I]; - MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } @@ -5631,12 +5936,12 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, if (Subtarget.hasVector()) { for (int I = 0; I < 32; I++) { unsigned Reg = SystemZMC::VR128Regs[I]; - MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } else { for (int I = 0; I < 16; I++) { unsigned Reg = SystemZMC::FP64Regs[I]; - MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } } @@ -5644,17 +5949,15 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, return MBB; } -MachineBasicBlock * -SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI, - MachineBasicBlock *MBB, - unsigned Opcode) const { +MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); - unsigned SrcReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(0).getReg(); // Create new virtual register of the same class as source. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); @@ -5664,14 +5967,14 @@ SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI, // well. BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) .addReg(SrcReg); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } -MachineBasicBlock *SystemZTargetLowering:: -EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { - switch (MI->getOpcode()) { +MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *MBB) const { + switch (MI.getOpcode()) { case SystemZ::Select32Mux: case SystemZ::Select32: case SystemZ::SelectF32: |