diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
| commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
| tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/PowerPC/PPCISelDAGToDAG.cpp | |
| parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) | |
Notes
Diffstat (limited to 'lib/Target/PowerPC/PPCISelDAGToDAG.cpp')
| -rw-r--r-- | lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2035 |
1 files changed, 1444 insertions, 591 deletions
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 901539b682ba..d3a223fe03e0 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -36,6 +36,8 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" @@ -53,8 +55,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -101,6 +101,29 @@ static cl::opt<bool> EnableBranchHint( cl::desc("Enable static hinting of branches on ppc"), cl::Hidden); +enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, + ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, + ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; + +static cl::opt<ICmpInGPRType> CmpInGPR( + "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), + cl::desc("Specify the types of comparisons to emit GPR-only code for."), + cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), + clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), + clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), + clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), + clEnumValN(ICGPR_NonExtIn, "nonextin", + "Only comparisons where inputs don't need [sz]ext."), + clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), + clEnumValN(ICGPR_ZextI32, "zexti32", + "Only i32 comparisons with zext result."), + clEnumValN(ICGPR_ZextI64, "zexti64", + "Only i64 comparisons with zext result."), + clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), + clEnumValN(ICGPR_SextI32, "sexti32", + "Only i32 comparisons with sext result."), + clEnumValN(ICGPR_SextI64, "sexti64", + "Only i64 comparisons with sext result."))); namespace { //===--------------------------------------------------------------------===// @@ -133,6 +156,12 @@ namespace { void PreprocessISelDAG() override; void PostprocessISelDAG() override; + /// getI16Imm - Return a target constant with the specified value, of type + /// i16. + inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i16); + } + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { @@ -168,6 +197,7 @@ namespace { bool tryBitfieldInsert(SDNode *N); bool tryBitPermutation(SDNode *N); + bool tryIntCompareInGPR(SDNode *N); /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. @@ -270,34 +300,7 @@ namespace { #include "PPCGenDAGISel.inc" private: - // Conversion type for interpreting results of a 32-bit instruction as - // a 64-bit value or vice versa. - enum ExtOrTruncConversion { Ext, Trunc }; - - // Modifiers to guide how an ISD::SETCC node's result is to be computed - // in a GPR. - // ZExtOrig - use the original condition code, zero-extend value - // ZExtInvert - invert the condition code, zero-extend value - // SExtOrig - use the original condition code, sign-extend value - // SExtInvert - invert the condition code, sign-extend value - enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; - bool trySETCC(SDNode *N); - bool tryEXTEND(SDNode *N); - bool tryLogicOpOfCompares(SDNode *N); - SDValue computeLogicOpInGPR(SDValue LogicOp); - SDValue signExtendInputIfNeeded(SDValue Input); - SDValue zeroExtendInputIfNeeded(SDValue Input); - SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); - SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, - int64_t RHSValue, SDLoc dl); - SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -388,7 +391,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - const Module *M = MF->getFunction()->getParent(); + const Module *M = MF->getFunction().getParent(); DebugLoc dl; if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { @@ -450,6 +453,12 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) { return isInt32Immediate(N.getNode(), Imm); } +/// isInt64Immediate - This method tests to see if the value is a 64-bit +/// constant operand. If so Imm will receive the 64-bit value. +static bool isInt64Immediate(SDValue N, uint64_t &Imm) { + return isInt64Immediate(N.getNode(), Imm); +} + static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, const SDValue &DestMBB) { assert(isa<BasicBlockSDNode>(DestMBB)); @@ -607,8 +616,6 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { unsigned MB, ME; if (isRunOfOnes(InsertMask, MB, ME)) { - SDValue Tmp1, Tmp2; - if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { Op1 = Op1.getOperand(0); @@ -643,8 +650,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { } // Predict the number of instructions that would be generated by calling -// getInt64(N). -static unsigned getInt64CountDirect(int64_t Imm) { +// selectI64Imm(N). +static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. @@ -712,8 +719,8 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) { return (Imm << R) | (Imm >> (64 - R)); } -static unsigned getInt64Count(int64_t Imm) { - unsigned Count = getInt64CountDirect(Imm); +static unsigned selectI64ImmInstrCount(int64_t Imm) { + unsigned Count = selectI64ImmInstrCountDirect(Imm); // If the instruction count is 1 or 2, we do not need further analysis // since rotate + load constant requires at least 2 instructions. @@ -722,10 +729,10 @@ static unsigned getInt64Count(int64_t Imm) { for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); - unsigned RCount = getInt64CountDirect(RImm) + 1; + unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; Count = std::min(Count, RCount); - // See comments in getInt64 for an explanation of the logic below. + // See comments in selectI64Imm for an explanation of the logic below. unsigned LS = findLastSet(RImm); if (LS != r-1) continue; @@ -733,17 +740,17 @@ static unsigned getInt64Count(int64_t Imm) { uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; - RCount = getInt64CountDirect(RImmWithOnes) + 1; + RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; Count = std::min(Count, RCount); } return Count; } -// Select a 64-bit constant. For cost-modeling purposes, getInt64Count +// Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount // (above) needs to be kept in sync with this function. -static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, - int64_t Imm) { +static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, + int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. @@ -779,8 +786,10 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, // Simple value. if (isInt<16>(Imm)) { + uint64_t SextImm = SignExtend64(Lo, 16); + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); // Just the Lo bits. - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); } else if (Lo) { // Handle the Hi bits. unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; @@ -825,13 +834,14 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, return Result; } -static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { - unsigned Count = getInt64CountDirect(Imm); +static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, + int64_t Imm) { + unsigned Count = selectI64ImmInstrCountDirect(Imm); // If the instruction count is 1 or 2, we do not need further analysis // since rotate + load constant requires at least 2 instructions. if (Count <= 2) - return getInt64Direct(CurDAG, dl, Imm); + return selectI64ImmDirect(CurDAG, dl, Imm); unsigned RMin = 0; @@ -840,7 +850,7 @@ static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); - unsigned RCount = getInt64CountDirect(RImm) + 1; + unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; if (RCount < Count) { Count = RCount; RMin = r; @@ -863,7 +873,7 @@ static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; - RCount = getInt64CountDirect(RImmWithOnes) + 1; + RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; if (RCount < Count) { Count = RCount; RMin = r; @@ -873,24 +883,86 @@ static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { } if (!RMin) - return getInt64Direct(CurDAG, dl, Imm); + return selectI64ImmDirect(CurDAG, dl, Imm); auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; - SDValue Val = SDValue(getInt64Direct(CurDAG, dl, MatImm), 0); + SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, getI32Imm(64 - RMin), getI32Imm(MaskEnd)); } +static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { + unsigned MaxTruncation = 0; + // Cannot use range-based for loop here as we need the actual use (i.e. we + // need the operand number corresponding to the use). A range-based for + // will unbox the use and provide an SDNode*. + for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); + Use != UseEnd; ++Use) { + unsigned Opc = + Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); + switch (Opc) { + default: return 0; + case ISD::TRUNCATE: + if (Use->isMachineOpcode()) + return 0; + MaxTruncation = + std::max(MaxTruncation, Use->getValueType(0).getSizeInBits()); + continue; + case ISD::STORE: { + if (Use->isMachineOpcode()) + return 0; + StoreSDNode *STN = cast<StoreSDNode>(*Use); + unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); + if (MemVTSize == 64 || Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, MemVTSize); + continue; + } + case PPC::STW8: + case PPC::STWX8: + case PPC::STWU8: + case PPC::STWUX8: + if (Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, 32u); + continue; + case PPC::STH8: + case PPC::STHX8: + case PPC::STHU8: + case PPC::STHUX8: + if (Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, 16u); + continue; + case PPC::STB8: + case PPC::STBX8: + case PPC::STBU8: + case PPC::STBUX8: + if (Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, 8u); + continue; + } + } + return MaxTruncation; +} + // Select a 64-bit constant. -static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) { +static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); // Get 64 bit value. int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); - return getInt64(CurDAG, dl, Imm); + if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { + uint64_t SextImm = SignExtend64(Imm, MinSize); + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); + if (isInt<16>(SextImm)) + return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); + } + return selectI64Imm(CurDAG, dl, Imm); } namespace { @@ -1090,6 +1162,25 @@ class BitPermutationSelector { return std::make_pair(Interesting = true, &Bits); } + case ISD::ZERO_EXTEND: { + // We support only the case with zero extension from i32 to i64 so far. + if (V.getValueType() != MVT::i64 || + V.getOperand(0).getValueType() != MVT::i32) + break; + + const SmallVector<ValueBit, 64> *LHSBits; + const unsigned NumOperandBits = 32; + std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), + NumOperandBits); + + for (unsigned i = 0; i < NumOperandBits; ++i) + Bits[i] = (*LHSBits)[i]; + + for (unsigned i = NumOperandBits; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + + return std::make_pair(Interesting, &Bits); + } } for (unsigned i = 0; i < NumBits; ++i) @@ -1351,6 +1442,24 @@ class BitPermutationSelector { return ~Mask; } + // This method extends an input value to 64 bit if input is 32-bit integer. + // While selecting instructions in BitPermutationSelector in 64-bit mode, + // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. + // In such case, we extend it to 64 bit to be consistent with other values. + SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { + if (V.getValueSizeInBits() == 64) + return V; + + assert(V.getValueSizeInBits() == 32); + SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, + MVT::i64), 0); + SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, + MVT::i64, ImDef, V, + SubRegIdx), 0); + return ExtVal; + } + // Depending on the number of groups for a particular value, it might be // better to rotate, mask explicitly (using andi/andis), and then or the // result. Select this part of the result first. @@ -1567,27 +1676,30 @@ class BitPermutationSelector { assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), - getI32Imm(InstMaskEnd - 32, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63) { SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); } if (InstMaskStart == 0) { SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskEnd, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); } @@ -1628,15 +1740,16 @@ class BitPermutationSelector { assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = - { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), - getI32Imm(InstMaskEnd - 32, dl) }; + { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = - { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; + { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); } @@ -1730,7 +1843,7 @@ class BitPermutationSelector { NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); else - NumAndInsts += getInt64Count(Mask) + /* and */ 1; + NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; unsigned NumRLInsts = 0; bool FirstBG = true; @@ -1786,10 +1899,14 @@ class BitPermutationSelector { SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, - VRot, getI32Imm(ANDIMask, dl)), 0); + ExtendToInt64(VRot, dl), + getI32Imm(ANDIMask, dl)), + 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, - VRot, getI32Imm(ANDISMask, dl)), 0); + ExtendToInt64(VRot, dl), + getI32Imm(ANDISMask, dl)), + 0); if (!ANDIVal) TotalVal = ANDISVal; @@ -1797,19 +1914,21 @@ class BitPermutationSelector { TotalVal = ANDIVal; else TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - ANDIVal, ANDISVal), 0); + ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { - TotalVal = SDValue(getInt64(CurDAG, dl, Mask), 0); + TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); TotalVal = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - VRot, TotalVal), 0); + ExtendToInt64(VRot, dl), TotalVal), + 0); } if (!Res) Res = TotalVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - Res, TotalVal), 0); + ExtendToInt64(Res, dl), TotalVal), + 0); // Now, remove all groups with this underlying value and rotation // factor. @@ -1929,10 +2048,10 @@ class BitPermutationSelector { SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, - Res, getI32Imm(ANDIMask, dl)), 0); + ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, - Res, getI32Imm(ANDISMask, dl)), 0); + ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) Res = ANDISVal; @@ -1940,14 +2059,14 @@ class BitPermutationSelector { Res = ANDIVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - ANDIVal, ANDISVal), 0); + ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { - if (InstCnt) *InstCnt += getInt64Count(Mask) + /* and */ 1; + if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; - SDValue MaskVal = SDValue(getInt64(CurDAG, dl, Mask), 0); + SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - Res, MaskVal), 0); + ExtendToInt64(Res, dl), MaskVal), 0); } } @@ -2046,8 +2165,1204 @@ public: } }; +class IntegerCompareEliminator { + SelectionDAG *CurDAG; + PPCDAGToDAGISel *S; + // Conversion type for interpreting results of a 32-bit instruction as + // a 64-bit value or vice versa. + enum ExtOrTruncConversion { Ext, Trunc }; + + // Modifiers to guide how an ISD::SETCC node's result is to be computed + // in a GPR. + // ZExtOrig - use the original condition code, zero-extend value + // ZExtInvert - invert the condition code, zero-extend value + // SExtOrig - use the original condition code, sign-extend value + // SExtInvert - invert the condition code, sign-extend value + enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; + + // Comparisons against zero to emit GPR code sequences for. Each of these + // sequences may need to be emitted for two or more equivalent patterns. + // For example (a >= 0) == (a > -1). The direction of the comparison (</>) + // matters as well as the extension type: sext (-1/0), zext (1/0). + // GEZExt - (zext (LHS >= 0)) + // GESExt - (sext (LHS >= 0)) + // LEZExt - (zext (LHS <= 0)) + // LESExt - (sext (LHS <= 0)) + enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; + + SDNode *tryEXTEND(SDNode *N); + SDNode *tryLogicOpOfCompares(SDNode *N); + SDValue computeLogicOpInGPR(SDValue LogicOp); + SDValue signExtendInputIfNeeded(SDValue Input); + SDValue zeroExtendInputIfNeeded(SDValue Input); + SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); + SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, + ZeroCompare CmpTy); + SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); + +public: + IntegerCompareEliminator(SelectionDAG *DAG, + PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { + assert(CurDAG->getTargetLoweringInfo() + .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && + "Only expecting to use this on 64 bit targets."); + } + SDNode *Select(SDNode *N) { + if (CmpInGPR == ICGPR_None) + return nullptr; + switch (N->getOpcode()) { + default: break; + case ISD::ZERO_EXTEND: + if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || + CmpInGPR == ICGPR_SextI64) + return nullptr; + LLVM_FALLTHROUGH; + case ISD::SIGN_EXTEND: + if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || + CmpInGPR == ICGPR_ZextI64) + return nullptr; + return tryEXTEND(N); + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return tryLogicOpOfCompares(N); + } + return nullptr; + } +}; + +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} +// The obvious case for wanting to keep the value in a GPR. Namely, the +// result of the comparison is actually needed in a GPR. +SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { + assert((N->getOpcode() == ISD::ZERO_EXTEND || + N->getOpcode() == ISD::SIGN_EXTEND) && + "Expecting a zero/sign extend node!"); + SDValue WideRes; + // If we are zero-extending the result of a logical operation on i1 + // values, we can keep the values in GPRs. + if (isLogicOp(N->getOperand(0).getOpcode()) && + N->getOperand(0).getValueType() == MVT::i1 && + N->getOpcode() == ISD::ZERO_EXTEND) + WideRes = computeLogicOpInGPR(N->getOperand(0)); + else if (N->getOperand(0).getOpcode() != ISD::SETCC) + return nullptr; + else + WideRes = + getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND ? + SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); + + if (!WideRes) + return nullptr; + + SDLoc dl(N); + bool Input32Bit = WideRes.getValueType() == MVT::i32; + bool Output32Bit = N->getValueType(0) == MVT::i32; + + NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; + NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; + + SDValue ConvOp = WideRes; + if (Input32Bit != Output32Bit) + ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : + ExtOrTruncConversion::Trunc); + return ConvOp.getNode(); +} + +// Attempt to perform logical operations on the results of comparisons while +// keeping the values in GPRs. Without doing so, these would end up being +// lowered to CR-logical operations which suffer from significant latency and +// low ILP. +SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { + if (N->getValueType(0) != MVT::i1) + return nullptr; + assert(isLogicOp(N->getOpcode()) && + "Expected a logic operation on setcc results."); + SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); + if (!LoweredLogical) + return nullptr; + + SDLoc dl(N); + bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; + unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue LHS = LoweredLogical.getOperand(0); + SDValue RHS = LoweredLogical.getOperand(1); + SDValue WideOp; + SDValue OpToConvToRecForm; + + // Look through any 32-bit to 64-bit implicit extend nodes to find the + // opcode that is input to the XORI. + if (IsBitwiseNegate && + LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) + OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); + else if (IsBitwiseNegate) + // If the input to the XORI isn't an extension, that's what we're after. + OpToConvToRecForm = LoweredLogical.getOperand(0); + else + // If this is not an XORI, it is a reg-reg logical op and we can convert + // it to record-form. + OpToConvToRecForm = LoweredLogical; + + // Get the record-form version of the node we're looking to use to get the + // CR result from. + uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); + int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); + + // Convert the right node to record-form. This is either the logical we're + // looking at or it is the input node to the negation (if we're looking at + // a bitwise negation). + if (NewOpc != -1 && IsBitwiseNegate) { + // The input to the XORI has a record-form. Use it. + assert(LoweredLogical.getConstantOperandVal(1) == 1 && + "Expected a PPC::XORI8 only for bitwise negation."); + // Emit the record-form instruction. + std::vector<SDValue> Ops; + for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) + Ops.push_back(OpToConvToRecForm.getOperand(i)); + + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc, dl, + OpToConvToRecForm.getValueType(), + MVT::Glue, Ops), 0); + } else { + assert((NewOpc != -1 || !IsBitwiseNegate) && + "No record form available for AND8/OR8/XOR8?"); + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, + MVT::i64, MVT::Glue, LHS, RHS), 0); + } + + // Select this node to a single bit from CR0 set by the record-form node + // just created. For bitwise negation, use the EQ bit which is the equivalent + // of negating the result (i.e. it is a bit set when the result of the + // operation is zero). + SDValue SRIdxVal = + CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); + SDValue CRBit = + SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + return CRBit.getNode(); +} + +// Lower a logical operation on i1 values into a GPR sequence if possible. +// The result can be kept in a GPR if requested. +// Three types of inputs can be handled: +// - SETCC +// - TRUNCATE +// - Logical operation (AND/OR/XOR) +// There is also a special case that is handled (namely a complement operation +// achieved with xor %a, -1). +SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { + assert(isLogicOp(LogicOp.getOpcode()) && + "Can only handle logic operations here."); + assert(LogicOp.getValueType() == MVT::i1 && + "Can only handle logic operations on i1 values here."); + SDLoc dl(LogicOp); + SDValue LHS, RHS; + + // Special case: xor %a, -1 + bool IsBitwiseNegation = isBitwiseNot(LogicOp); + + // Produces a GPR sequence for each operand of the binary logic operation. + // For SETCC, it produces the respective comparison, for TRUNCATE it truncates + // the value in a GPR and for logic operations, it will recursively produce + // a GPR sequence for the operation. + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + S->getI64Imm(0, dl), + S->getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return computeLogicOpInGPR(Operand); + return SDValue(); + }; + LHS = getLogicOperand(LogicOp.getOperand(0)); + RHS = getLogicOperand(LogicOp.getOperand(1)); + + // If a GPR sequence can't be produced for the LHS we can't proceed. + // Not producing a GPR sequence for the RHS is only a problem if this isn't + // a bitwise negation operation. + if (!LHS || (!RHS && !IsBitwiseNegation)) + return SDValue(); + + NumLogicOpsOnComparison++; + + // We will use the inputs as 64-bit values. + if (LHS.getValueType() == MVT::i32) + LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); + if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) + RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { + default: llvm_unreachable("Unknown logic operation."); + case ISD::AND: NewOpc = PPC::AND8; break; + case ISD::OR: NewOpc = PPC::OR8; break; + case ISD::XOR: NewOpc = PPC::XOR8; break; + } + + if (IsBitwiseNegation) { + RHS = S->getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); + +} + +/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. +/// Otherwise just reinterpret it as a 64-bit value. +/// Useful when emitting comparison code for 32-bit values without using +/// the compare instruction (which only considers the lower 32-bits). +SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { + assert(Input.getValueType() == MVT::i32 && + "Can only sign-extend 32-bit values here."); + unsigned Opc = Input.getOpcode(); + + // The value was sign extended and then truncated to 32-bits. No need to + // sign extend it again. + if (Opc == ISD::TRUNCATE && + (Input.getOperand(0).getOpcode() == ISD::AssertSext || + Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); + // The input is a sign-extending load. All ppc sign-extending loads + // sign-extend to the full 64-bits. + if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); + // We don't sign-extend constants. + if (InputConst) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + SDLoc dl(Input); + SignExtensionsAdded++; + return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, + MVT::i64, Input), 0); +} + +/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. +/// Otherwise just reinterpret it as a 64-bit value. +/// Useful when emitting comparison code for 32-bit values without using +/// the compare instruction (which only considers the lower 32-bits). +SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { + assert(Input.getValueType() == MVT::i32 && + "Can only zero-extend 32-bit values here."); + unsigned Opc = Input.getOpcode(); + + // The only condition under which we can omit the actual extend instruction: + // - The value is a positive constant + // - The value comes from a load that isn't a sign-extending load + // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. + bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && + (Input.getOperand(0).getOpcode() == ISD::AssertZext || + Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); + if (IsTruncateOfZExt) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); + if (InputConst && InputConst->getSExtValue() >= 0) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); + // The input is a load that doesn't sign-extend (it will be zero-extended). + if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + // None of the above, need to zero-extend. + SDLoc dl(Input); + ZeroExtensionsAdded++; + return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, + S->getI64Imm(0, dl), + S->getI64Imm(32, dl)), 0); +} + +// Handle a 32-bit value in a 64-bit register and vice-versa. These are of +// course not actual zero/sign extensions that will generate machine code, +// they're just a way to reinterpret a 32 bit value in a register as a +// 64 bit value and vice-versa. +SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, + ExtOrTruncConversion Conv) { + SDLoc dl(NatWidthRes); + + // For reinterpreting 32-bit values as 64 bit values, we generate + // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> + if (Conv == ExtOrTruncConversion::Ext) { + SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); + SDValue SubRegIdx = + CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, + ImDef, NatWidthRes, SubRegIdx), 0); + } + + assert(Conv == ExtOrTruncConversion::Trunc && + "Unknown convertion between 32 and 64 bit values."); + // For reinterpreting 64-bit values as 32-bit values, we just need to + // EXTRACT_SUBREG (i.e. extract the low word). + SDValue SubRegIdx = + CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, + NatWidthRes, SubRegIdx), 0); +} + +// Produce a GPR sequence for compound comparisons (<=, >=) against zero. +// Handle both zero-extensions and sign-extensions. +SDValue +IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, + ZeroCompare CmpTy) { + EVT InVT = LHS.getValueType(); + bool Is32Bit = InVT == MVT::i32; + SDValue ToExtend; + + // Produce the value that needs to be either zero or sign extended. + switch (CmpTy) { + case ZeroCompare::GEZExt: + case ZeroCompare::GESExt: + ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, + dl, InVT, LHS, LHS), 0); + break; + case ZeroCompare::LEZExt: + case ZeroCompare::LESExt: { + if (Is32Bit) { + // Upper 32 bits cannot be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + SDValue Neg = + SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); + ToExtend = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Neg, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } else { + SDValue Addi = + SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, + S->getI64Imm(~0ULL, dl)), 0); + ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, + Addi, LHS), 0); + } + break; + } + } + + // For 64-bit sequences, the extensions are the same for the GE/LE cases. + if (!Is32Bit && + (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + ToExtend, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + if (!Is32Bit && + (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, + S->getI64Imm(63, dl)), 0); + + assert(Is32Bit && "Should have handled the 32-bit sequences above."); + // For 32-bit sequences, the extensions differ between GE/LE cases. + switch (CmpTy) { + case ZeroCompare::GEZExt: { + SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), + S->getI32Imm(31, dl) }; + return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, + ShiftOps), 0); + } + case ZeroCompare::GESExt: + return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, + S->getI32Imm(31, dl)), 0); + case ZeroCompare::LEZExt: + return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, + S->getI32Imm(1, dl)), 0); + case ZeroCompare::LESExt: + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, + S->getI32Imm(-1, dl)), 0); + } + + // The above case covers all the enumerators so it can't have a default clause + // to avoid compiler warnings. + llvm_unreachable("Unknown zero-comparison type."); +} + +/// Produces a zero-extended result of comparing two 32-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || + CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) + // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), + S->getI32Imm(31, dl) }; + return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, + ShiftOps), 0); + } + case ISD::SETNE: { + // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) + // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), + S->getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + S->getI32Imm(1, dl)), 0); + } + case ISD::SETGE: { + // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) + // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) + if(IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + + // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) + // by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) + // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) + if(IsRHSZero) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + } + + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue Sub = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, + S->getI64Imm(1, dl), S->getI64Imm(63, dl)), + 0); + return + SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, + MVT::i64, Shift, S->getI32Imm(1, dl)), 0); + } + case ISD::SETGT: { + // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) + // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) + // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) + // Handle SETLT -1 (which is equivalent to SETGE 0). + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + + if (IsRHSZero) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue Neg = + SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); + } + // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as + // (%b < %a) by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) + // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) + // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) + // Handle SETLT 1 (which is equivalent to SETLE 0). + if (IsRHSOne) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + } + + if (IsRHSZero) { + SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), + S->getI32Imm(31, dl) }; + return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, + ShiftOps), 0); + } + + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue SUBFNode = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + SUBFNode, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } + case ISD::SETUGE: + // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) + // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); + SDValue SrdiNode = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Subtract, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, + S->getI32Imm(1, dl)), 0); + } + case ISD::SETUGT: + // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) + // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Subtract, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } + } +} + +/// Produces a sign-extended result of comparing two 32-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || + CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (sext (setcc %a, %b, seteq)) -> + // (ashr (shl (ctlz (xor %a, %b)), 58), 63) + // (sext (setcc %a, 0, seteq)) -> + // (ashr (shl (ctlz %a), 58), 63) + SDValue CountInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Cntlzw = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); + SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), + S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; + SDValue Slwi = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); + } + case ISD::SETNE: { + // Bitwise xor the operands, count leading zeros, shift right by 5 bits and + // flip the bit, finally take 2's complement. + // (sext (setcc %a, %b, setne)) -> + // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) + // Same as above, but the first xor is not needed. + // (sext (setcc %a, 0, setne)) -> + // (neg (xor (lshr (ctlz %a), 5), 1)) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = + { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + SDValue Xori = + SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + S->getI32Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); + } + case ISD::SETGE: { + // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) + // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + + // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) + // by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) + // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue SUBFNode = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 0); + SDValue Srdi = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + SUBFNode, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, + S->getI32Imm(-1, dl)), 0); + } + case ISD::SETGT: { + // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) + // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) + // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + if (IsRHSZero) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue Neg = + SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, + S->getI64Imm(63, dl)), 0); + } + // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as + // (%b < %a) by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) + // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) + // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) + if (IsRHSOne) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + } + if (IsRHSZero) + return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, + S->getI32Imm(31, dl)), 0); + + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue SUBFNode = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + SUBFNode, S->getI64Imm(63, dl)), 0); + } + case ISD::SETUGE: + // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) + // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, + S->getI32Imm(1, dl), S->getI32Imm(63,dl)), + 0); + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, + S->getI32Imm(-1, dl)), 0); + } + case ISD::SETUGT: + // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) + // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + Subtract, S->getI64Imm(63, dl)), 0); + } + } +} + +/// Produces a zero-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || + CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) + // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, + S->getI64Imm(58, dl), + S->getI64Imm(63, dl)), 0); + } + case ISD::SETNE: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue AC = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + Xor, S->getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, + Xor, AC.getValue(1)), 0); + } + case ISD::SETGE: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setge)) -> + // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) + // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setge)) -> + // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) + // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + SDValue ShiftL = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue ShiftR = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, + S->getI64Imm(63, dl)), 0); + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, + ShiftR, ShiftL, SubtractCarry), 0); + } + case ISD::SETGT: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setgt)) -> + // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) + // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + if (IsRHSZero) { + SDValue Addi = + SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, + S->getI64Imm(~0ULL, dl)), 0); + SDValue Nor = + SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setlt)) -> + // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) + // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) + if (IsRHSOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + if (IsRHSZero) + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SRADINode = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + LHS, S->getI64Imm(63, dl)), 0); + SDValue SRDINode = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + RHS, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SUBFC8Carry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + SDValue ADDE8Node = + SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, + SRDINode, SRADINode, SUBFC8Carry), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, + ADDE8Node, S->getI64Imm(1, dl)), 0); + } + case ISD::SETUGE: + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) + SDValue SUBFC8Carry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + SDValue SUBFE8Node = + SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, + LHS, LHS, SUBFC8Carry), 0); + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, + SUBFE8Node, S->getI64Imm(1, dl)), 0); + } + case ISD::SETUGT: + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + SDValue ExtSub = + SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, + LHS, LHS, SubtractCarry), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, + ExtSub), 0); + } + } +} + +/// Produces a sign-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || + CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue AddInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Addic = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + AddInput, S->getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, + Addic, Addic.getValue(1)), 0); + } + case ISD::SETNE: { + // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) + // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) + // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) + // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue SC = + SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, + Xor, S->getI32Imm(0, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, + SC, SC.getValue(1)), 0); + } + case ISD::SETGE: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setge)) -> + // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) + // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setge)) -> + // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) + // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + SDValue ShiftR = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, + S->getI64Imm(63, dl)), 0); + SDValue ShiftL = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + SDValue Adde = + SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, + ShiftR, ShiftL, SubtractCarry), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); + } + case ISD::SETGT: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setgt)) -> + // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) + // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + if (IsRHSZero) { + SDValue Add = + SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, + S->getI64Imm(-1, dl)), 0); + SDValue Nor = + SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, + S->getI64Imm(63, dl)), 0); + } + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setlt)) -> + // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) + // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) + if (IsRHSOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + if (IsRHSZero) { + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, + S->getI64Imm(63, dl)), 0); + } + SDValue SRADINode = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + LHS, S->getI64Imm(63, dl)), 0); + SDValue SRDINode = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + RHS, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SUBFC8Carry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + SDValue ADDE8Node = + SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, + SRDINode, SRADINode, SUBFC8Carry), 0); + SDValue XORI8Node = + SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, + ADDE8Node, S->getI64Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, + XORI8Node), 0); + } + case ISD::SETUGE: + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + SDValue ExtSub = + SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, + LHS, SubtractCarry), 0); + return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, + ExtSub, ExtSub), 0); + } + case ISD::SETUGT: + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) + SDValue SubCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, + LHS, LHS, SubCarry), 0); + } + } +} + +/// Do all uses of this SDValue need the result in a GPR? +/// This is meant to be used on values that have type i1 since +/// it is somewhat meaningless to ask if values of other types +/// should be kept in GPR's. +static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { + assert(Compare.getOpcode() == ISD::SETCC && + "An ISD::SETCC node required here."); + + // For values that have a single use, the caller should obviously already have + // checked if that use is an extending use. We check the other uses here. + if (Compare.hasOneUse()) + return true; + // We want the value in a GPR if it is being extended, used for a select, or + // used in logical operations. + for (auto CompareUse : Compare.getNode()->uses()) + if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && + CompareUse->getOpcode() != ISD::ZERO_EXTEND && + CompareUse->getOpcode() != ISD::SELECT && + !isLogicOp(CompareUse->getOpcode())) { + OmittedForNonExtendUses++; + return false; + } + return true; +} + +/// Returns an equivalent of a SETCC node but with the result the same width as +/// the inputs. This can nalso be used for SELECT_CC if either the true or false +/// values is a power of two while the other is zero. +SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, + SetccInGPROpts ConvOpts) { + assert((Compare.getOpcode() == ISD::SETCC || + Compare.getOpcode() == ISD::SELECT_CC) && + "An ISD::SETCC node required here."); + + // Don't convert this comparison to a GPR sequence because there are uses + // of the i1 result (i.e. uses that require the result in the CR). + if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) + return SDValue(); + + SDValue LHS = Compare.getOperand(0); + SDValue RHS = Compare.getOperand(1); + + // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. + int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; + ISD::CondCode CC = + cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); + EVT InputVT = LHS.getValueType(); + if (InputVT != MVT::i32 && InputVT != MVT::i64) + return SDValue(); + + if (ConvOpts == SetccInGPROpts::ZExtInvert || + ConvOpts == SetccInGPROpts::SExtInvert) + CC = ISD::getSetCCInverse(CC, true); + + bool Inputs32Bit = InputVT == MVT::i32; + + SDLoc dl(Compare); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; + bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || + ConvOpts == SetccInGPROpts::SExtInvert; + + if (IsSext && Inputs32Bit) + return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (Inputs32Bit) + return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (IsSext) + return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); +} + } // end anonymous namespace +bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { + if (N->getValueType(0) != MVT::i32 && + N->getValueType(0) != MVT::i64) + return false; + + // This optimization will emit code that assumes 64-bit registers + // so we don't want to run it in 32-bit mode. Also don't run it + // on functions that are not to be optimized. + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + + switch (N->getOpcode()) { + default: break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + IntegerCompareEliminator ICmpElim(CurDAG, this); + if (SDNode *New = ICmpElim.Select(N)) { + ReplaceNode(N, New); + return true; + } + } + } + return false; +} + bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) @@ -2504,506 +3819,6 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { return true; } -// Is this opcode a bitwise logical operation? -static bool isLogicOp(unsigned Opc) { - return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; -} - -/// If this node is a sign/zero extension of an integer comparison, -/// it can usually be computed in GPR's rather than using comparison -/// instructions and ISEL. We only do this on 64-bit targets for now -/// as the code is specialized for 64-bit (it uses 64-bit instructions -/// and assumes 64-bit registers). -bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { - if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) - return false; - assert((N->getOpcode() == ISD::ZERO_EXTEND || - N->getOpcode() == ISD::SIGN_EXTEND) && - "Expecting a zero/sign extend node!"); - - SDValue WideRes; - // If we are zero-extending the result of a logical operation on i1 - // values, we can keep the values in GPRs. - if (isLogicOp(N->getOperand(0).getOpcode()) && - N->getOperand(0).getValueType() == MVT::i1 && - N->getOpcode() == ISD::ZERO_EXTEND) - WideRes = computeLogicOpInGPR(N->getOperand(0)); - else if (N->getOperand(0).getOpcode() != ISD::SETCC) - return false; - else - WideRes = - getSETCCInGPR(N->getOperand(0), - N->getOpcode() == ISD::SIGN_EXTEND ? - SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); - - if (!WideRes) - return false; - - SDLoc dl(N); - bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32; - bool Output32Bit = N->getValueType(0) == MVT::i32; - - NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; - NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; - - SDValue ConvOp = WideRes; - if (Inputs32Bit != Output32Bit) - ConvOp = addExtOrTrunc(WideRes, Inputs32Bit ? ExtOrTruncConversion::Ext : - ExtOrTruncConversion::Trunc); - ReplaceNode(N, ConvOp.getNode()); - - return true; -} - -// Lower a logical operation on i1 values into a GPR sequence if possible. -// The result can be kept in a GPR if requested. -// Three types of inputs can be handled: -// - SETCC -// - TRUNCATE -// - Logical operation (AND/OR/XOR) -// There is also a special case that is handled (namely a complement operation -// achieved with xor %a, -1). -SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) { - assert(isLogicOp(LogicOp.getOpcode()) && - "Can only handle logic operations here."); - assert(LogicOp.getValueType() == MVT::i1 && - "Can only handle logic operations on i1 values here."); - SDLoc dl(LogicOp); - SDValue LHS, RHS; - - // Special case: xor %a, -1 - bool IsBitwiseNegation = isBitwiseNot(LogicOp); - - // Produces a GPR sequence for each operand of the binary logic operation. - // For SETCC, it produces the respective comparison, for TRUNCATE it truncates - // the value in a GPR and for logic operations, it will recursively produce - // a GPR sequence for the operation. - auto getLogicOperand = [&] (SDValue Operand) -> SDValue { - unsigned OperandOpcode = Operand.getOpcode(); - if (OperandOpcode == ISD::SETCC) - return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); - else if (OperandOpcode == ISD::TRUNCATE) { - SDValue InputOp = Operand.getOperand(0); - EVT InVT = InputOp.getValueType(); - return - SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : - PPC::RLDICL, dl, InVT, InputOp, - getI64Imm(0, dl), getI64Imm(63, dl)), 0); - } else if (isLogicOp(OperandOpcode)) - return computeLogicOpInGPR(Operand); - return SDValue(); - }; - LHS = getLogicOperand(LogicOp.getOperand(0)); - RHS = getLogicOperand(LogicOp.getOperand(1)); - - // If a GPR sequence can't be produced for the LHS we can't proceed. - // Not producing a GPR sequence for the RHS is only a problem if this isn't - // a bitwise negation operation. - if (!LHS || (!RHS && !IsBitwiseNegation)) - return SDValue(); - - NumLogicOpsOnComparison++; - - // We will use the inputs as 64-bit values. - if (LHS.getValueType() == MVT::i32) - LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); - if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) - RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); - - unsigned NewOpc; - switch (LogicOp.getOpcode()) { - default: llvm_unreachable("Unknown logic operation."); - case ISD::AND: NewOpc = PPC::AND8; break; - case ISD::OR: NewOpc = PPC::OR8; break; - case ISD::XOR: NewOpc = PPC::XOR8; break; - } - - if (IsBitwiseNegation) { - RHS = getI64Imm(1, dl); - NewOpc = PPC::XORI8; - } - - return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); - -} - -/// Try performing logical operations on results of comparisons in GPRs. -/// It is typically preferred from a performance perspective over performing -/// the operations on individual bits in the CR. We only do this on 64-bit -/// targets for now as the code is specialized for 64-bit (it uses 64-bit -/// instructions and assumes 64-bit registers). -bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { - if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) - return false; - if (N->getValueType(0) != MVT::i1) - return false; - assert(isLogicOp(N->getOpcode()) && - "Expected a logic operation on setcc results."); - SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); - if (!LoweredLogical) - return false; - - SDLoc dl(N); - bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; - unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; - SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); - SDValue LHS = LoweredLogical.getOperand(0); - SDValue RHS = LoweredLogical.getOperand(1); - SDValue WideOp; - SDValue OpToConvToRecForm; - - // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode - // that is input to the XORI. - if (IsBitwiseNegate && - LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) - OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); - else if (IsBitwiseNegate) - // If the input to the XORI isn't an extension, that's what we're after. - OpToConvToRecForm = LoweredLogical.getOperand(0); - else - // If this is not an XORI, it is a reg-reg logical op and we can convert it - // to record-form. - OpToConvToRecForm = LoweredLogical; - - // Get the record-form version of the node we're looking to use to get the - // CR result from. - uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); - int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); - - // Convert the right node to record-form. This is either the logical we're - // looking at or it is the input node to the negation (if we're looking at - // a bitwise negation). - if (NewOpc != -1 && IsBitwiseNegate) { - // The input to the XORI has a record-form. Use it. - assert(LoweredLogical.getConstantOperandVal(1) == 1 && - "Expected a PPC::XORI8 only for bitwise negation."); - // Emit the record-form instruction. - std::vector<SDValue> Ops; - for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) - Ops.push_back(OpToConvToRecForm.getOperand(i)); - - WideOp = - SDValue(CurDAG->getMachineNode(NewOpc, dl, - OpToConvToRecForm.getValueType(), - MVT::Glue, Ops), 0); - } else { - assert((NewOpc != -1 || !IsBitwiseNegate) && - "No record form available for AND8/OR8/XOR8?"); - WideOp = - SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, - MVT::i64, MVT::Glue, LHS, RHS), 0); - } - - // Select this node to a single bit from CR0 set by the record-form node - // just created. For bitwise negation, use the EQ bit which is the equivalent - // of negating the result (i.e. it is a bit set when the result of the - // operation is zero). - SDValue SRIdxVal = - CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); - SDValue CRBit = - SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, - MVT::i1, CR0Reg, SRIdxVal, - WideOp.getValue(1)), 0); - ReplaceNode(N, CRBit.getNode()); - return true; -} - -/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. -/// Useful when emitting comparison code for 32-bit values without using -/// the compare instruction (which only considers the lower 32-bits). -SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) { - assert(Input.getValueType() == MVT::i32 && - "Can only sign-extend 32-bit values here."); - unsigned Opc = Input.getOpcode(); - - // The value was sign extended and then truncated to 32-bits. No need to - // sign extend it again. - if (Opc == ISD::TRUNCATE && - (Input.getOperand(0).getOpcode() == ISD::AssertSext || - Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) - return Input; - - LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); - // The input is a sign-extending load. No reason to sign-extend. - if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) - return Input; - - ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); - // We don't sign-extend constants and already sign-extended values. - if (InputConst || Opc == ISD::AssertSext || Opc == ISD::SIGN_EXTEND_INREG || - Opc == ISD::SIGN_EXTEND) - return Input; - - SDLoc dl(Input); - SignExtensionsAdded++; - return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32, dl, MVT::i32, Input), 0); -} - -/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. -/// Useful when emitting comparison code for 32-bit values without using -/// the compare instruction (which only considers the lower 32-bits). -SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) { - assert(Input.getValueType() == MVT::i32 && - "Can only zero-extend 32-bit values here."); - LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); - unsigned Opc = Input.getOpcode(); - - // No need to zero-extend loaded values (unless they're loaded with - // a sign-extending load). - if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) - return Input; - - ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); - bool InputZExtConst = InputConst && InputConst->getSExtValue() >= 0; - // An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have - // to conservatively actually clear the high bits. We also don't need to - // zero-extend constants or values that are already zero-extended. - if (InputZExtConst || Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND) - return Input; - - SDLoc dl(Input); - ZeroExtensionsAdded++; - return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Input, - getI64Imm(0, dl), getI64Imm(32, dl)), - 0); -} - -// Handle a 32-bit value in a 64-bit register and vice-versa. These are of -// course not actual zero/sign extensions that will generate machine code, -// they're just a way to reinterpret a 32 bit value in a register as a -// 64 bit value and vice-versa. -SDValue PPCDAGToDAGISel::addExtOrTrunc(SDValue NatWidthRes, - ExtOrTruncConversion Conv) { - SDLoc dl(NatWidthRes); - - // For reinterpreting 32-bit values as 64 bit values, we generate - // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> - if (Conv == ExtOrTruncConversion::Ext) { - SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); - SDValue SubRegIdx = - CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); - return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, - ImDef, NatWidthRes, SubRegIdx), 0); - } - - assert(Conv == ExtOrTruncConversion::Trunc && - "Unknown convertion between 32 and 64 bit values."); - // For reinterpreting 64-bit values as 32-bit values, we just need to - // EXTRACT_SUBREG (i.e. extract the low word). - SDValue SubRegIdx = - CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); - return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, - NatWidthRes, SubRegIdx), 0); -} - -/// Produces a zero-extended result of comparing two 32-bit values according to -/// the passed condition code. -SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - bool IsRHSZero = RHSValue == 0; - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) - // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); - SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), - getI32Imm(31, dl) }; - return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, - ShiftOps), 0); - } - case ISD::SETNE: { - // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) - // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); - SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), - getI32Imm(31, dl) }; - SDValue Shift = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); - return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, - getI32Imm(1, dl)), 0); - } - } -} - -/// Produces a sign-extended result of comparing two 32-bit values according to -/// the passed condition code. -SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - bool IsRHSZero = RHSValue == 0; - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // (sext (setcc %a, %b, seteq)) -> - // (ashr (shl (ctlz (xor %a, %b)), 58), 63) - // (sext (setcc %a, 0, seteq)) -> - // (ashr (shl (ctlz %a), 58), 63) - SDValue CountInput = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Cntlzw = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); - SDValue SHLOps[] = { Cntlzw, getI32Imm(58, dl), getI32Imm(0, dl) }; - SDValue Sldi = - SDValue(CurDAG->getMachineNode(PPC::RLDICR_32, dl, MVT::i32, SHLOps), 0); - return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, - getI32Imm(63, dl)), 0); - } - case ISD::SETNE: { - // Bitwise xor the operands, count leading zeros, shift right by 5 bits and - // flip the bit, finally take 2's complement. - // (sext (setcc %a, %b, setne)) -> - // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) - // Same as above, but the first xor is not needed. - // (sext (setcc %a, 0, setne)) -> - // (neg (xor (lshr (ctlz %a), 5), 1)) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); - SDValue ShiftOps[] = - { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; - SDValue Shift = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); - SDValue Xori = - SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, - getI32Imm(1, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); - } - } -} - -/// Produces a zero-extended result of comparing two 64-bit values according to -/// the passed condition code. -SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - bool IsRHSZero = RHSValue == 0; - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) - // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) - SDValue Xor = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue Clz = - SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); - return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, - getI64Imm(58, dl), getI64Imm(63, dl)), - 0); - } - } -} - -/// Produces a sign-extended result of comparing two 64-bit values according to -/// the passed condition code. -SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, - ISD::CondCode CC, - int64_t RHSValue, SDLoc dl) { - bool IsRHSZero = RHSValue == 0; - switch (CC) { - default: return SDValue(); - case ISD::SETEQ: { - // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) - // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) - // {addcz.reg, addcz.CA} = (addcarry %a, -1) - // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) - SDValue AddInput = IsRHSZero ? LHS : - SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); - SDValue Addic = - SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, - AddInput, getI32Imm(~0U, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, - Addic, Addic.getValue(1)), 0); - } - } -} - -/// Does this SDValue have any uses for which keeping the value in a GPR is -/// appropriate. This is meant to be used on values that have type i1 since -/// it is somewhat meaningless to ask if values of other types can be kept in -/// GPR's. -static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { - assert(Compare.getOpcode() == ISD::SETCC && - "An ISD::SETCC node required here."); - - // For values that have a single use, the caller should obviously already have - // checked if that use is an extending use. We check the other uses here. - if (Compare.hasOneUse()) - return true; - // We want the value in a GPR if it is being extended, used for a select, or - // used in logical operations. - for (auto CompareUse : Compare.getNode()->uses()) - if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && - CompareUse->getOpcode() != ISD::ZERO_EXTEND && - CompareUse->getOpcode() != ISD::SELECT && - !isLogicOp(CompareUse->getOpcode())) { - OmittedForNonExtendUses++; - return false; - } - return true; -} - -/// Returns an equivalent of a SETCC node but with the result the same width as -/// the inputs. This can nalso be used for SELECT_CC if either the true or false -/// values is a power of two while the other is zero. -SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, - SetccInGPROpts ConvOpts) { - assert((Compare.getOpcode() == ISD::SETCC || - Compare.getOpcode() == ISD::SELECT_CC) && - "An ISD::SETCC node required here."); - - // Don't convert this comparison to a GPR sequence because there are uses - // of the i1 result (i.e. uses that require the result in the CR). - if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) - return SDValue(); - - SDValue LHS = Compare.getOperand(0); - SDValue RHS = Compare.getOperand(1); - - // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. - int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; - ISD::CondCode CC = - cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); - EVT InputVT = LHS.getValueType(); - if (InputVT != MVT::i32 && InputVT != MVT::i64) - return SDValue(); - - if (ConvOpts == SetccInGPROpts::ZExtInvert || - ConvOpts == SetccInGPROpts::SExtInvert) - CC = ISD::getSetCCInverse(CC, true); - - bool Inputs32Bit = InputVT == MVT::i32; - if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { - LHS = zeroExtendInputIfNeeded(LHS); - RHS = zeroExtendInputIfNeeded(RHS); - } - - SDLoc dl(Compare); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; - bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || - ConvOpts == SetccInGPROpts::SExtInvert; - - if (IsSext && Inputs32Bit) - return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); - else if (Inputs32Bit) - return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); - else if (IsSext) - return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); - return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); -} - /// Does this node represent a load/store node whose address can be represented /// with a register plus an immediate that's a multiple of \p Val: bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { @@ -3016,8 +3831,18 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { AddrOp = STN->getOperand(2); short Imm = 0; - if (AddrOp.getOpcode() == ISD::ADD) + if (AddrOp.getOpcode() == ISD::ADD) { + // If op0 is a frame index that is under aligned, we can't do it either, + // because it is translated to r31 or r1 + slot + offset. We won't know the + // slot number until the stack frame is finalized. + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddrOp.getOperand(0))) { + const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); + unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); + if ((SlotAlign % Val) != 0) + return false; + } return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); + } // If the address comes from the outside, the offset will be zero. return AddrOp.getOpcode() == ISD::CopyFromReg; @@ -3050,22 +3875,20 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryBitPermutation(N)) return; + // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). + if (tryIntCompareInGPR(N)) + return; + switch (N->getOpcode()) { default: break; case ISD::Constant: if (N->getValueType(0) == MVT::i64) { - ReplaceNode(N, getInt64(CurDAG, N)); + ReplaceNode(N, selectI64Imm(CurDAG, N)); return; } break; - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - if (tryEXTEND(N)) - return; - break; - case ISD::SETCC: if (trySETCC(N)) return; @@ -3209,9 +4032,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::AND: { - if (tryLogicOpOfCompares(N)) - return; - unsigned Imm, Imm2, SH, MB, ME; uint64_t Imm64; @@ -3331,9 +4151,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryBitfieldInsert(N)) return; - if (tryLogicOpOfCompares(N)) - return; - int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { @@ -3348,12 +4165,48 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } } + // OR with a 32-bit immediate can be handled by ori + oris + // without creating an immediate in a GPR. + uint64_t Imm64 = 0; + bool IsPPC64 = PPCSubTarget->isPPC64(); + if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && + (Imm64 & ~0xFFFFFFFFuLL) == 0) { + // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. + uint64_t ImmHi = Imm64 >> 16; + uint64_t ImmLo = Imm64 & 0xFFFF; + if (ImmHi != 0 && ImmLo != 0) { + SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, + N->getOperand(0), + getI16Imm(ImmLo, dl)); + SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; + CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); + return; + } + } + // Other cases are autogenerated. break; } case ISD::XOR: { - if (tryLogicOpOfCompares(N)) - return; + // XOR with a 32-bit immediate can be handled by xori + xoris + // without creating an immediate in a GPR. + uint64_t Imm64 = 0; + bool IsPPC64 = PPCSubTarget->isPPC64(); + if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && + (Imm64 & ~0xFFFFFFFFuLL) == 0) { + // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. + uint64_t ImmHi = Imm64 >> 16; + uint64_t ImmLo = Imm64 & 0xFFFF; + if (ImmHi != 0 && ImmLo != 0) { + SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, + N->getOperand(0), + getI16Imm(ImmLo, dl)); + SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; + CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); + return; + } + } + break; } case ISD::ADD: { @@ -3666,9 +4519,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // The first source operand is a TargetGlobalAddress or a TargetJumpTable. // If it must be toc-referenced according to PPCSubTarget, we generate: - // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) + // LDtocL(@sym, ADDIStocHA(%x2, @sym)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) + // ADDItocL(ADDIStocHA(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, |
