diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 1397 |
1 files changed, 878 insertions, 519 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 30906a32b00c..11c97210ead9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -19,6 +19,7 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -72,27 +73,6 @@ static cl::opt<bool> UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation")); -// Index into the OpcodesForSpill array. -enum SpillOpcodeKey { - SOK_Int4Spill, - SOK_Int8Spill, - SOK_Float8Spill, - SOK_Float4Spill, - SOK_CRSpill, - SOK_CRBitSpill, - SOK_VRVectorSpill, - SOK_VSXVectorSpill, - SOK_VectorFloat8Spill, - SOK_VectorFloat4Spill, - SOK_VRSaveSpill, - SOK_QuadFloat8Spill, - SOK_QuadFloat4Spill, - SOK_QuadBitSpill, - SOK_SpillToVSR, - SOK_SPESpill, - SOK_LastOpcodeSpill // This must be last on the enum. -}; - // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -225,13 +205,42 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +/// This is an architecture-specific helper function of reassociateOps. +/// Set special operand attributes for new instructions after reassociation. +void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, + MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const { + // Propagate FP flags from the original instructions. + // But clear poison-generating flags because those may not be valid now. + uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + NewMI1.setFlags(IntersectedFlags); + NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); + NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); + NewMI1.clearFlag(MachineInstr::MIFlag::IsExact); + + NewMI2.setFlags(IntersectedFlags); + NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap); + NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap); + NewMI2.clearFlag(MachineInstr::MIFlag::IsExact); +} + +void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI, + uint16_t Flags) const { + MI.setFlags(Flags); + MI.clearFlag(MachineInstr::MIFlag::NoSWrap); + MI.clearFlag(MachineInstr::MIFlag::NoUWrap); + MI.clearFlag(MachineInstr::MIFlag::IsExact); +} + // This function does not list all associative and commutative operations, but // only those worth feeding through the machine combiner in an attempt to // reduce the critical path. Mostly, this means floating-point operations, -// because they have high latencies (compared to other operations, such and +// because they have high latencies(>=5) (compared to other operations, such as // and/or, which are also associative and commutative, but have low latencies). bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { switch (Inst.getOpcode()) { + // Floating point: // FP Add: case PPC::FADD: case PPC::FADDS: @@ -258,12 +267,157 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { case PPC::QVFMUL: case PPC::QVFMULS: case PPC::QVFMULSs: + return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && + Inst.getFlag(MachineInstr::MIFlag::FmNsz); + // Fixed point: + // Multiply: + case PPC::MULHD: + case PPC::MULLD: + case PPC::MULHW: + case PPC::MULLW: return true; default: return false; } } +#define InfoArrayIdxFMAInst 0 +#define InfoArrayIdxFAddInst 1 +#define InfoArrayIdxFMULInst 2 +#define InfoArrayIdxAddOpIdx 3 +#define InfoArrayIdxMULOpIdx 4 +// Array keeps info for FMA instructions: +// Index 0(InfoArrayIdxFMAInst): FMA instruction; +// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA; +// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA; +// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands; +// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands; +// second MUL operand index is plus 1. +static const uint16_t FMAOpIdxInfo[][5] = { + // FIXME: Add more FMA instructions like XSNMADDADP and so on. + {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2}, + {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2}, + {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2}, + {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2}, + {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1}, + {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1}, + {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1}, + {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}}; + +// Check if an opcode is a FMA instruction. If it is, return the index in array +// FMAOpIdxInfo. Otherwise, return -1. +int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { + for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++) + if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode) + return I; + return -1; +} + +// Try to reassociate FMA chains like below: +// +// Pattern 1: +// A = FADD X, Y (Leaf) +// B = FMA A, M21, M22 (Prev) +// C = FMA B, M31, M32 (Root) +// --> +// A = FMA X, M21, M22 +// B = FMA Y, M31, M32 +// C = FADD A, B +// +// Pattern 2: +// A = FMA X, M11, M12 (Leaf) +// B = FMA A, M21, M22 (Prev) +// C = FMA B, M31, M32 (Root) +// --> +// A = FMUL M11, M12 +// B = FMA X, M21, M22 +// D = FMA A, M31, M32 +// C = FADD B, D +// +// breaking the dependency between A and B, allowing FMA to be executed in +// parallel (or back-to-back in a pipeline) instead of depending on each other. +bool PPCInstrInfo::getFMAPatterns( + MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) const { + MachineBasicBlock *MBB = Root.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) { + for (const auto &MO : Instr.explicit_operands()) + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) + return false; + return true; + }; + + auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx, + bool IsLeaf, bool IsAdd) { + int16_t Idx = -1; + if (!IsAdd) { + Idx = getFMAOpIdxInfo(Instr.getOpcode()); + if (Idx < 0) + return false; + } else if (Instr.getOpcode() != + FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())] + [InfoArrayIdxFAddInst]) + return false; + + // Instruction can be reassociated. + // fast math flags may prohibit reassociation. + if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) && + Instr.getFlag(MachineInstr::MIFlag::FmNsz))) + return false; + + // Instruction operands are virtual registers for reassociation. + if (!IsAllOpsVirtualReg(Instr)) + return false; + + if (IsAdd && IsLeaf) + return true; + + AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; + + const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx); + MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg()); + // If 'add' operand's def is not in current block, don't do ILP related opt. + if (!MIAdd || MIAdd->getParent() != MBB) + return false; + + // If this is not Leaf FMA Instr, its 'add' operand should only have one use + // as this fma will be changed later. + return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg()); + }; + + int16_t AddOpIdx = -1; + // Root must be a valid FMA like instruction. + if (!IsReassociable(Root, AddOpIdx, false, false)) + return false; + + assert((AddOpIdx >= 0) && "add operand index not right!"); + + Register RegB = Root.getOperand(AddOpIdx).getReg(); + MachineInstr *Prev = MRI.getUniqueVRegDef(RegB); + + // Prev must be a valid FMA like instruction. + AddOpIdx = -1; + if (!IsReassociable(*Prev, AddOpIdx, false, false)) + return false; + + assert((AddOpIdx >= 0) && "add operand index not right!"); + + Register RegA = Prev->getOperand(AddOpIdx).getReg(); + MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA); + AddOpIdx = -1; + if (IsReassociable(*Leaf, AddOpIdx, true, false)) { + Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM); + return true; + } + if (IsReassociable(*Leaf, AddOpIdx, true, true)) { + Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM); + return true; + } + return false; +} + bool PPCInstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns) const { @@ -272,16 +426,201 @@ bool PPCInstrInfo::getMachineCombinerPatterns( if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) return false; - // FP reassociation is only legal when we don't need strict IEEE semantics. - if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) - return false; + if (getFMAPatterns(Root, Patterns)) + return true; return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); } +void PPCInstrInfo::genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { + switch (Pattern) { + case MachineCombinerPattern::REASSOC_XY_AMM_BMM: + case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: + reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); + break; + default: + // Reassociate default patterns. + TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, + DelInstrs, InstrIdxForVirtReg); + break; + } +} + +// Currently, only handle two patterns REASSOC_XY_AMM_BMM and +// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns. +void PPCInstrInfo::reassociateFMA( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { + MachineFunction *MF = Root.getMF(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineOperand &OpC = Root.getOperand(0); + Register RegC = OpC.getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(RegC); + MRI.constrainRegClass(RegC, RC); + + unsigned FmaOp = Root.getOpcode(); + int16_t Idx = getFMAOpIdxInfo(FmaOp); + assert(Idx >= 0 && "Root must be a FMA instruction"); + + uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; + uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; + MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); + MachineInstr *Leaf = + MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); + uint16_t IntersectedFlags = + Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); + + auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg, + bool &KillFlag) { + Reg = Operand.getReg(); + MRI.constrainRegClass(Reg, RC); + KillFlag = Operand.isKill(); + }; + + auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1, + Register &MulOp2, bool &MulOp1KillFlag, + bool &MulOp2KillFlag) { + GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag); + GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag); + }; + + Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32; + bool KillX = false, KillY = false, KillM11 = false, KillM12 = false, + KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false; + + GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32); + GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22); + + if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { + GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12); + GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX); + } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { + GetOperandInfo(Leaf->getOperand(1), RegX, KillX); + GetOperandInfo(Leaf->getOperand(2), RegY, KillY); + } + + // Create new virtual registers for the new results instead of + // recycling legacy ones because the MachineCombiner's computation of the + // critical path requires a new register definition rather than an existing + // one. + Register NewVRA = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0)); + + Register NewVRB = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1)); + + Register NewVRD = 0; + if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { + NewVRD = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2)); + } + + auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd, + Register RegMul1, bool KillRegMul1, + Register RegMul2, bool KillRegMul2) { + MI->getOperand(AddOpIdx).setReg(RegAdd); + MI->getOperand(AddOpIdx).setIsKill(KillAdd); + MI->getOperand(FirstMulOpIdx).setReg(RegMul1); + MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1); + MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2); + MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2); + }; + + if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { + // Create new instructions for insertion. + MachineInstrBuilder MINewB = + BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegM21, getKillRegState(KillM21)) + .addReg(RegM22, getKillRegState(KillM22)); + MachineInstrBuilder MINewA = + BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA) + .addReg(RegY, getKillRegState(KillY)) + .addReg(RegM31, getKillRegState(KillM31)) + .addReg(RegM32, getKillRegState(KillM32)); + // If AddOpIdx is not 1, adjust the order. + if (AddOpIdx != 1) { + AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22); + AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32); + } + + MachineInstrBuilder MINewC = + BuildMI(*MF, Root.getDebugLoc(), + get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC) + .addReg(NewVRB, getKillRegState(true)) + .addReg(NewVRA, getKillRegState(true)); + + // Update flags for newly created instructions. + setSpecialOperandAttr(*MINewA, IntersectedFlags); + setSpecialOperandAttr(*MINewB, IntersectedFlags); + setSpecialOperandAttr(*MINewC, IntersectedFlags); + + // Record new instructions for insertion. + InsInstrs.push_back(MINewA); + InsInstrs.push_back(MINewB); + InsInstrs.push_back(MINewC); + } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { + assert(NewVRD && "new FMA register not created!"); + // Create new instructions for insertion. + MachineInstrBuilder MINewA = + BuildMI(*MF, Leaf->getDebugLoc(), + get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA) + .addReg(RegM11, getKillRegState(KillM11)) + .addReg(RegM12, getKillRegState(KillM12)); + MachineInstrBuilder MINewB = + BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegM21, getKillRegState(KillM21)) + .addReg(RegM22, getKillRegState(KillM22)); + MachineInstrBuilder MINewD = + BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD) + .addReg(NewVRA, getKillRegState(true)) + .addReg(RegM31, getKillRegState(KillM31)) + .addReg(RegM32, getKillRegState(KillM32)); + // If AddOpIdx is not 1, adjust the order. + if (AddOpIdx != 1) { + AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22); + AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32, + KillM32); + } + + MachineInstrBuilder MINewC = + BuildMI(*MF, Root.getDebugLoc(), + get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC) + .addReg(NewVRB, getKillRegState(true)) + .addReg(NewVRD, getKillRegState(true)); + + // Update flags for newly created instructions. + setSpecialOperandAttr(*MINewA, IntersectedFlags); + setSpecialOperandAttr(*MINewB, IntersectedFlags); + setSpecialOperandAttr(*MINewD, IntersectedFlags); + setSpecialOperandAttr(*MINewC, IntersectedFlags); + + // Record new instructions for insertion. + InsInstrs.push_back(MINewA); + InsInstrs.push_back(MINewB); + InsInstrs.push_back(MINewD); + InsInstrs.push_back(MINewC); + } + + assert(!InsInstrs.empty() && + "Insertion instructions set should not be empty!"); + + // Record old instructions for deletion. + DelInstrs.push_back(Leaf); + DelInstrs.push_back(Prev); + DelInstrs.push_back(&Root); +} + // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, + Register &SrcReg, Register &DstReg, unsigned &SubIdx) const { switch (MI.getOpcode()) { default: return false; @@ -753,9 +1092,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, // Select analysis. bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, - ArrayRef<MachineOperand> Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, int &TrueCycles, int &FalseCycles) const { + ArrayRef<MachineOperand> Cond, + Register DstReg, Register TrueReg, + Register FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { if (Cond.size() != 2) return false; @@ -791,9 +1131,9 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const DebugLoc &dl, unsigned DestReg, - ArrayRef<MachineOperand> Cond, unsigned TrueReg, - unsigned FalseReg) const { + const DebugLoc &dl, Register DestReg, + ArrayRef<MachineOperand> Cond, Register TrueReg, + Register FalseReg) const { assert(Cond.size() == 2 && "PPC branch conditions have two components!"); @@ -852,7 +1192,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; } - unsigned FirstReg = SwapOps ? FalseReg : TrueReg, + Register FirstReg = SwapOps ? FalseReg : TrueReg, SecondReg = SwapOps ? TrueReg : FalseReg; // The first input register of isel cannot be r0. If it is a member @@ -863,7 +1203,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, const TargetRegisterClass *FirstRC = MRI.getRegClass(FirstReg)->contains(PPC::X0) ? &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass; - unsigned OldFirstReg = FirstReg; + Register OldFirstReg = FirstReg; FirstReg = MRI.createVirtualRegister(FirstRC); BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg) .addReg(OldFirstReg); @@ -1024,183 +1364,66 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } -unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, - const TargetRegisterClass *RC) - const { - const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray(); +static unsigned getSpillIndex(const TargetRegisterClass *RC) { int OpcodeIndex = 0; - if (RC != nullptr) { - if (PPC::GPRCRegClass.hasSubClassEq(RC) || - PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Int4Spill; - } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || - PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Int8Spill; - } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Float8Spill; - } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Float4Spill; - } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPESpill; - } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_CRSpill; - } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_CRBitSpill; - } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VRVectorSpill; - } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VSXVectorSpill; - } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VectorFloat8Spill; - } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VectorFloat4Spill; - } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VRSaveSpill; - } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_QuadFloat8Spill; - } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_QuadFloat4Spill; - } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_QuadBitSpill; - } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SpillToVSR; - } else { - llvm_unreachable("Unknown regclass!"); - } + if (PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_Int4Spill; + } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || + PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_Int8Spill; + } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_Float8Spill; + } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_Float4Spill; + } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_SPESpill; + } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_CRSpill; + } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_CRBitSpill; + } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_VRVectorSpill; + } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_VSXVectorSpill; + } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_VectorFloat8Spill; + } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_VectorFloat4Spill; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_VRSaveSpill; + } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_QuadFloat8Spill; + } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_QuadFloat4Spill; + } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_QuadBitSpill; + } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_SpillToVSR; } else { - if (PPC::GPRCRegClass.contains(Reg) || - PPC::GPRC_NOR0RegClass.contains(Reg)) { - OpcodeIndex = SOK_Int4Spill; - } else if (PPC::G8RCRegClass.contains(Reg) || - PPC::G8RC_NOX0RegClass.contains(Reg)) { - OpcodeIndex = SOK_Int8Spill; - } else if (PPC::F8RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_Float8Spill; - } else if (PPC::F4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_Float4Spill; - } else if (PPC::SPERCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPESpill; - } else if (PPC::CRRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_CRSpill; - } else if (PPC::CRBITRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_CRBitSpill; - } else if (PPC::VRRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VRVectorSpill; - } else if (PPC::VSRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VSXVectorSpill; - } else if (PPC::VSFRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VectorFloat8Spill; - } else if (PPC::VSSRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VectorFloat4Spill; - } else if (PPC::VRSAVERCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VRSaveSpill; - } else if (PPC::QFRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_QuadFloat8Spill; - } else if (PPC::QSRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_QuadFloat4Spill; - } else if (PPC::QBRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_QuadBitSpill; - } else if (PPC::SPILLTOVSRRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SpillToVSR; - } else { - llvm_unreachable("Unknown regclass!"); - } + llvm_unreachable("Unknown regclass!"); } - return OpcodesForSpill[OpcodeIndex]; + return OpcodeIndex; } unsigned -PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, - const TargetRegisterClass *RC) const { - const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray(); - int OpcodeIndex = 0; +PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass *RC) const { + const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray(); + return OpcodesForSpill[getSpillIndex(RC)]; +} - if (RC != nullptr) { - if (PPC::GPRCRegClass.hasSubClassEq(RC) || - PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Int4Spill; - } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || - PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Int8Spill; - } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Float8Spill; - } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_Float4Spill; - } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPESpill; - } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_CRSpill; - } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_CRBitSpill; - } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VRVectorSpill; - } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VSXVectorSpill; - } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VectorFloat8Spill; - } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VectorFloat4Spill; - } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_VRSaveSpill; - } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_QuadFloat8Spill; - } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_QuadFloat4Spill; - } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_QuadBitSpill; - } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SpillToVSR; - } else { - llvm_unreachable("Unknown regclass!"); - } - } else { - if (PPC::GPRCRegClass.contains(Reg) || - PPC::GPRC_NOR0RegClass.contains(Reg)) { - OpcodeIndex = SOK_Int4Spill; - } else if (PPC::G8RCRegClass.contains(Reg) || - PPC::G8RC_NOX0RegClass.contains(Reg)) { - OpcodeIndex = SOK_Int8Spill; - } else if (PPC::F8RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_Float8Spill; - } else if (PPC::F4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_Float4Spill; - } else if (PPC::SPERCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPESpill; - } else if (PPC::CRRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_CRSpill; - } else if (PPC::CRBITRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_CRBitSpill; - } else if (PPC::VRRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VRVectorSpill; - } else if (PPC::VSRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VSXVectorSpill; - } else if (PPC::VSFRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VectorFloat8Spill; - } else if (PPC::VSSRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VectorFloat4Spill; - } else if (PPC::VRSAVERCRegClass.contains(Reg)) { - OpcodeIndex = SOK_VRSaveSpill; - } else if (PPC::QFRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_QuadFloat8Spill; - } else if (PPC::QSRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_QuadFloat4Spill; - } else if (PPC::QBRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_QuadBitSpill; - } else if (PPC::SPILLTOVSRRCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SpillToVSR; - } else { - llvm_unreachable("Unknown regclass!"); - } - } - return OpcodesForSpill[OpcodeIndex]; +unsigned +PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass *RC) const { + const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray(); + return OpcodesForSpill[getSpillIndex(RC)]; } void PPCInstrInfo::StoreRegToStackSlot( MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr *> &NewMIs) const { - unsigned Opcode = getStoreOpcodeForSpill(PPC::NoRegister, RC); + unsigned Opcode = getStoreOpcodeForSpill(RC); DebugLoc DL; PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); @@ -1221,24 +1444,13 @@ void PPCInstrInfo::StoreRegToStackSlot( FuncInfo->setHasNonRISpills(); } -void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, - int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void PPCInstrInfo::storeRegToStackSlotNoUpd( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr *, 4> NewMIs; - // We need to avoid a situation in which the value from a VRRC register is - // spilled using an Altivec instruction and reloaded into a VSRC register - // using a VSX instruction. The issue with this is that the VSX - // load/store instructions swap the doublewords in the vector and the Altivec - // ones don't. The register classes on the spill/reload may be different if - // the register is defined using an Altivec instruction and is then used by a - // VSX instruction. - RC = updatedRC(RC); - StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) @@ -1248,16 +1460,33 @@ void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdx), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), - MFI.getObjectAlignment(FrameIdx)); + MFI.getObjectAlign(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); } +void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + // We need to avoid a situation in which the value from a VRRC register is + // spilled using an Altivec instruction and reloaded into a VSRC register + // using a VSX instruction. The issue with this is that the VSX + // load/store instructions swap the doublewords in the vector and the Altivec + // ones don't. The register classes on the spill/reload may be different if + // the register is defined using an Altivec instruction and is then used by a + // VSX instruction. + RC = updatedRC(RC); + storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI); +} + void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr *> &NewMIs) const { - unsigned Opcode = getLoadOpcodeForSpill(PPC::NoRegister, RC); + unsigned Opcode = getLoadOpcodeForSpill(RC); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg), FrameIdx)); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); @@ -1273,12 +1502,10 @@ void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, FuncInfo->setHasNonRISpills(); } -void -PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void PPCInstrInfo::loadRegFromStackSlotNoUpd( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, + int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; @@ -1287,16 +1514,6 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); FuncInfo->setHasSpills(); - // We need to avoid a situation in which the value from a VRRC register is - // spilled using an Altivec instruction and reloaded into a VSRC register - // using a VSX instruction. The issue with this is that the VSX - // load/store instructions swap the doublewords in the vector and the Altivec - // ones don't. The register classes on the spill/reload may be different if - // the register is defined using an Altivec instruction and is then used by a - // VSX instruction. - if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) - RC = &PPC::VSRCRegClass; - LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) @@ -1306,10 +1523,27 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdx), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), - MFI.getObjectAlignment(FrameIdx)); + MFI.getObjectAlign(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); } +void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + // We need to avoid a situation in which the value from a VRRC register is + // spilled using an Altivec instruction and reloaded into a VSRC register + // using a VSX instruction. The issue with this is that the VSX + // load/store instructions swap the doublewords in the vector and the Altivec + // ones don't. The register classes on the spill/reload may be different if + // the register is defined using an Altivec instruction and is then used by a + // VSX instruction. + RC = updatedRC(RC); + + loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI); +} + bool PPCInstrInfo:: reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); @@ -1321,9 +1555,11 @@ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const { - // For some instructions, it is legal to fold ZERO into the RA register field. +// For some instructions, it is legal to fold ZERO into the RA register field. +// This function performs that fold by replacing the operand with PPC::ZERO, +// it does not consider whether the load immediate zero is no longer in use. +bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg) const { // A zero immediate should always be loaded with a single li. unsigned DefOpc = DefMI.getOpcode(); if (DefOpc != PPC::LI && DefOpc != PPC::LI8) @@ -1343,6 +1579,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (UseMCID.isPseudo()) return false; + // We need to find which of the User's operands is to be folded, that will be + // the operand that matches the given register ID. unsigned UseIdx; for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx) if (UseMI.getOperand(UseIdx).isReg() && @@ -1371,7 +1609,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (UseInfo->Constraints != 0) return false; - unsigned ZeroReg; + MCRegister ZeroReg; if (UseInfo->isLookupPtrRegClass()) { bool isPPC64 = Subtarget.isPPC64(); ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO; @@ -1380,13 +1618,19 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, PPC::ZERO8 : PPC::ZERO; } - bool DeleteDef = MRI->hasOneNonDBGUse(Reg); UseMI.getOperand(UseIdx).setReg(ZeroReg); + return true; +} - if (DeleteDef) +// Folds zero into instructions which have a load immediate zero as an operand +// but also recognize zero as immediate zero. If the definition of the load +// has no more users it is deleted. +bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + Register Reg, MachineRegisterInfo *MRI) const { + bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg); + if (MRI->use_nodbg_empty(Reg)) DefMI.eraseFromParent(); - - return true; + return Changed; } static bool MBBDefinesCTR(MachineBasicBlock &MBB) { @@ -1423,17 +1667,6 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { return false; } -bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { - if (!MI.isTerminator()) - return false; - - // Conditional branch is a special case. - if (MI.isBranch() && !MI.isBarrier()) - return true; - - return !isPredicated(MI); -} - bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { unsigned OpC = MI.getOpcode(); @@ -1587,8 +1820,8 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI, return Found; } -bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, - unsigned &SrcReg2, int &Mask, +bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, + Register &SrcReg2, int &Mask, int &Value) const { unsigned Opc = MI.getOpcode(); @@ -1617,8 +1850,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, } } -bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, - unsigned SrcReg2, int Mask, int Value, +bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, + Register SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const { if (DisableCmpOpt) return false; @@ -1646,8 +1879,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; // Look through copies unless that gets us to a physical register. - unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); - if (Register::isVirtualRegister(ActualSrc)) + Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); + if (ActualSrc.isVirtual()) SrcReg = ActualSrc; // Get the unique definition of SrcReg. @@ -2036,8 +2269,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { static const std::pair<unsigned, const char *> TargetFlags[] = { {MO_PLT, "ppc-plt"}, {MO_PIC_FLAG, "ppc-pic"}, - {MO_NLP_FLAG, "ppc-nlp"}, - {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}}; + {MO_PCREL_FLAG, "ppc-pcrel"}, + {MO_GOT_FLAG, "ppc-got"}}; return makeArrayRef(TargetFlags); } @@ -2330,7 +2563,8 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); const TargetRegisterInfo *TRI = &getRegisterInfo(); // If we're in SSA, get the defs through the MRI. Otherwise, only look - // within the basic block to see if the register is defined using an LI/LI8. + // within the basic block to see if the register is defined using an + // LI/LI8/ADDI/ADDI8. if (MRI->isSSA()) { for (int i = 1, e = MI.getNumOperands(); i < e; i++) { if (!MI.getOperand(i).isReg()) @@ -2341,9 +2575,16 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI); if (Register::isVirtualRegister(TrueReg)) { DefMI = MRI->getVRegDef(TrueReg); - if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) { + if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8 || + DefMI->getOpcode() == PPC::ADDI || + DefMI->getOpcode() == PPC::ADDI8) { OpNoForForwarding = i; - break; + // The ADDI and LI operand maybe exist in one instruction at same + // time. we prefer to fold LI operand as LI only has one Imm operand + // and is more possible to be converted. So if current DefMI is + // ADDI/ADDI8, we continue to find possible LI/LI8. + if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) + break; } } } @@ -2400,44 +2641,20 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( return OpNoForForwarding == ~0U ? nullptr : DefMI; } +unsigned PPCInstrInfo::getSpillTarget() const { + return Subtarget.hasP9Vector() ? 1 : 0; +} + const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { - static const unsigned OpcodesForSpill[2][SOK_LastOpcodeSpill] = { - // Power 8 - {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, - PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, - PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, - PPC::SPILLTOVSR_ST, PPC::EVSTDD}, - // Power 9 - {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, - PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, - PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, - PPC::SPILLTOVSR_ST}}; - - return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0]; + return StoreSpillOpcodesArray[getSpillTarget()]; } const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { - static const unsigned OpcodesForSpill[2][SOK_LastOpcodeSpill] = { - // Power 8 - {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, - PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, - PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, - PPC::SPILLTOVSR_LD, PPC::EVLDD}, - // Power 9 - {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, - PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32, - PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, - PPC::SPILLTOVSR_LD}}; - - return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0]; + return LoadSpillOpcodesArray[getSpillTarget()]; } void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, unsigned RegNo) const { - const MachineRegisterInfo &MRI = - StartMI.getParent()->getParent()->getRegInfo(); - if (MRI.isSSA()) - return; // Instructions between [StartMI, EndMI] should be in same basic block. assert((StartMI.getParent() == EndMI.getParent()) && @@ -2588,6 +2805,13 @@ bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const { return true; return false; }; + + // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is + // treated as special zero when ScaleReg is R0/X0 register. + if (III.ZeroIsSpecialOrig == III.ImmOpNo && + (ScaleReg == PPC::R0 || ScaleReg == PPC::X0)) + return false; + // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr // and Imm Instr. if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI)) @@ -2631,6 +2855,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, if (Opc != PPC::ADDI && Opc != PPC::ADDI8) return false; + // The operand may not necessarily be an immediate - it could be a relocation. + if (!ADDIMI.getOperand(2).isImm()) + return false; + Imm = ADDIMI.getOperand(2).getImm(); return true; @@ -2746,10 +2974,16 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, "The forwarding operand needs to be valid at this point"); bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill(); bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled; - Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); if (KilledDef && KillFwdDefMI) *KilledDef = DefMI; + // If this is a imm instruction and its register operands is produced by ADDI, + // put the imm into imm inst directly. + if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) != + PPC::INSTRUCTION_LIST_END && + transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand)) + return true; + ImmInstrInfo III; bool IsVFReg = MI.getOperand(0).isReg() ? isVFRegister(MI.getOperand(0).getReg()) @@ -2763,228 +2997,17 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, KillFwdDefMI)) return true; - if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) || - !DefMI->getOperand(1).isImm()) - return false; - - int64_t Immediate = DefMI->getOperand(1).getImm(); - // Sign-extend to 64-bits. - int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ? - (Immediate | 0xFFFFFFFFFFFF0000) : Immediate; - // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by LI, convert it now. - if (HasImmForm) - return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm); - - bool ReplaceWithLI = false; - bool Is64BitLI = false; - int64_t NewImm = 0; - bool SetCR = false; - unsigned Opc = MI.getOpcode(); - switch (Opc) { - default: return false; - - // FIXME: Any branches conditional on such a comparison can be made - // unconditional. At this time, this happens too infrequently to be worth - // the implementation effort, but if that ever changes, we could convert - // such a pattern here. - case PPC::CMPWI: - case PPC::CMPLWI: - case PPC::CMPDI: - case PPC::CMPLDI: { - // Doing this post-RA would require dataflow analysis to reliably find uses - // of the CR register set by the compare. - // No need to fixup killed/dead flag since this transformation is only valid - // before RA. - if (PostRA) - return false; - // If a compare-immediate is fed by an immediate and is itself an input of - // an ISEL (the most common case) into a COPY of the correct register. - bool Changed = false; - Register DefReg = MI.getOperand(0).getReg(); - int64_t Comparand = MI.getOperand(2).getImm(); - int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ? - (Comparand | 0xFFFFFFFFFFFF0000) : Comparand; - - for (auto &CompareUseMI : MRI->use_instructions(DefReg)) { - unsigned UseOpc = CompareUseMI.getOpcode(); - if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) - continue; - unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); - Register TrueReg = CompareUseMI.getOperand(1).getReg(); - Register FalseReg = CompareUseMI.getOperand(2).getReg(); - unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg, - FalseReg, CRSubReg); - if (RegToCopy == PPC::NoRegister) - continue; - // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0. - if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) { - CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI)); - replaceInstrOperandWithImm(CompareUseMI, 1, 0); - CompareUseMI.RemoveOperand(3); - CompareUseMI.RemoveOperand(2); - continue; - } - LLVM_DEBUG( - dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n"); - LLVM_DEBUG(DefMI->dump(); MI.dump(); CompareUseMI.dump()); - LLVM_DEBUG(dbgs() << "Is converted to:\n"); - // Convert to copy and remove unneeded operands. - CompareUseMI.setDesc(get(PPC::COPY)); - CompareUseMI.RemoveOperand(3); - CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1); - CmpIselsConverted++; - Changed = true; - LLVM_DEBUG(CompareUseMI.dump()); - } - if (Changed) - return true; - // This may end up incremented multiple times since this function is called - // during a fixed-point transformation, but it is only meant to indicate the - // presence of this opportunity. - MissedConvertibleImmediateInstrs++; - return false; - } - - // Immediate forms - may simply be convertable to an LI. - case PPC::ADDI: - case PPC::ADDI8: { - // Does the sum fit in a 16-bit signed field? - int64_t Addend = MI.getOperand(2).getImm(); - if (isInt<16>(Addend + SExtImm)) { - ReplaceWithLI = true; - Is64BitLI = Opc == PPC::ADDI8; - NewImm = Addend + SExtImm; - break; - } - return false; - } - case PPC::RLDICL: - case PPC::RLDICL_rec: - case PPC::RLDICL_32: - case PPC::RLDICL_32_64: { - // Use APInt's rotate function. - int64_t SH = MI.getOperand(2).getImm(); - int64_t MB = MI.getOperand(3).getImm(); - APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32, - SExtImm, true); - InVal = InVal.rotl(SH); - uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; - InVal &= Mask; - // Can't replace negative values with an LI as that will sign-extend - // and not clear the left bits. If we're setting the CR bit, we will use - // ANDI_rec which won't sign extend, so that's safe. - if (isUInt<15>(InVal.getSExtValue()) || - (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) { - ReplaceWithLI = true; - Is64BitLI = Opc != PPC::RLDICL_32; - NewImm = InVal.getSExtValue(); - SetCR = Opc == PPC::RLDICL_rec; - break; - } - return false; - } - case PPC::RLWINM: - case PPC::RLWINM8: - case PPC::RLWINM_rec: - case PPC::RLWINM8_rec: { - int64_t SH = MI.getOperand(2).getImm(); - int64_t MB = MI.getOperand(3).getImm(); - int64_t ME = MI.getOperand(4).getImm(); - APInt InVal(32, SExtImm, true); - InVal = InVal.rotl(SH); - // Set the bits ( MB + 32 ) to ( ME + 32 ). - uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1); - InVal &= Mask; - // Can't replace negative values with an LI as that will sign-extend - // and not clear the left bits. If we're setting the CR bit, we will use - // ANDI_rec which won't sign extend, so that's safe. - bool ValueFits = isUInt<15>(InVal.getSExtValue()); - ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) && - isUInt<16>(InVal.getSExtValue())); - if (ValueFits) { - ReplaceWithLI = true; - Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec; - NewImm = InVal.getSExtValue(); - SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec; - break; - } - return false; - } - case PPC::ORI: - case PPC::ORI8: - case PPC::XORI: - case PPC::XORI8: { - int64_t LogicalImm = MI.getOperand(2).getImm(); - int64_t Result = 0; - if (Opc == PPC::ORI || Opc == PPC::ORI8) - Result = LogicalImm | SExtImm; - else - Result = LogicalImm ^ SExtImm; - if (isInt<16>(Result)) { - ReplaceWithLI = true; - Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8; - NewImm = Result; - break; - } - return false; - } - } - - if (ReplaceWithLI) { - // We need to be careful with CR-setting instructions we're replacing. - if (SetCR) { - // We don't know anything about uses when we're out of SSA, so only - // replace if the new immediate will be reproduced. - bool ImmChanged = (SExtImm & NewImm) != NewImm; - if (PostRA && ImmChanged) - return false; - - if (!PostRA) { - // If the defining load-immediate has no other uses, we can just replace - // the immediate with the new immediate. - if (MRI->hasOneUse(DefMI->getOperand(0).getReg())) - DefMI->getOperand(1).setImm(NewImm); - - // If we're not using the GPR result of the CR-setting instruction, we - // just need to and with zero/non-zero depending on the new immediate. - else if (MRI->use_empty(MI.getOperand(0).getReg())) { - if (NewImm) { - assert(Immediate && "Transformation converted zero to non-zero?"); - NewImm = Immediate; - } - } - else if (ImmChanged) - return false; - } - } - - LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); - LLVM_DEBUG(MI.dump()); - LLVM_DEBUG(dbgs() << "Fed by:\n"); - LLVM_DEBUG(DefMI->dump()); - LoadImmediateInfo LII; - LII.Imm = NewImm; - LII.Is64Bit = Is64BitLI; - LII.SetCR = SetCR; - // If we're setting the CR, the original load-immediate must be kept (as an - // operand to ANDI_rec/ANDI8_rec). - if (KilledDef && SetCR) - *KilledDef = nullptr; - replaceInstrWithLI(MI, LII); - - // Fixup killed/dead flag after transformation. - // Pattern: - // ForwardingOperandReg = LI imm1 - // y = op2 imm2, ForwardingOperandReg(killed) - if (IsForwardingOperandKilled) - fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg); + if (HasImmForm && + transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI)) + return true; - LLVM_DEBUG(dbgs() << "With:\n"); - LLVM_DEBUG(MI.dump()); + // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI + // can be simpified to LI. + if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef)) return true; - } + return false; } @@ -3501,6 +3524,10 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, RegMO = &DefMI.getOperand(1); ImmMO = &DefMI.getOperand(2); + // Before RA, ADDI first operand could be a frame index. + if (!RegMO->isReg()) + return false; + // This DefMI is elgible for forwarding if it is: // 1. add inst // 2. one of the operands is Imm/CPI/Global. @@ -3549,7 +3576,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding( bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, const MachineInstr &DefMI, const ImmInstrInfo &III, - int64_t &Imm) const { + int64_t &Imm, + int64_t BaseImm) const { assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate"); if (DefMI.getOpcode() == PPC::ADDItocL) { // The operand for ADDItocL is CPI, which isn't imm at compiling time, @@ -3563,19 +3591,21 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, // not just an immediate but also a multiple of 4, or 16 depending on the // load. A DForm load cannot be represented if it is a multiple of say 2. // XForm loads do not have this restriction. - if (ImmMO.isGlobal() && - ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf) - return false; + if (ImmMO.isGlobal()) { + const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout(); + if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf) + return false; + } return true; } if (ImmMO.isImm()) { // It is Imm, we need to check if the Imm fit the range. - int64_t Immediate = ImmMO.getImm(); // Sign-extend to 64-bits. - Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ? - (Immediate | 0xFFFFFFFFFFFF0000) : Immediate; + // DefMI may be folded with another imm form instruction, the result Imm is + // the sum of Imm of DefMI and BaseImm which is from imm form instruction. + Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm); if (Imm % III.ImmMustBeMultipleOf) return false; @@ -3599,6 +3629,328 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, return true; } +bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, + unsigned OpNoForForwarding, + MachineInstr **KilledDef) const { + if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) || + !DefMI.getOperand(1).isImm()) + return false; + + MachineFunction *MF = MI.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + bool PostRA = !MRI->isSSA(); + + int64_t Immediate = DefMI.getOperand(1).getImm(); + // Sign-extend to 64-bits. + int64_t SExtImm = SignExtend64<16>(Immediate); + + bool IsForwardingOperandKilled = MI.getOperand(OpNoForForwarding).isKill(); + Register ForwardingOperandReg = MI.getOperand(OpNoForForwarding).getReg(); + + bool ReplaceWithLI = false; + bool Is64BitLI = false; + int64_t NewImm = 0; + bool SetCR = false; + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + return false; + + // FIXME: Any branches conditional on such a comparison can be made + // unconditional. At this time, this happens too infrequently to be worth + // the implementation effort, but if that ever changes, we could convert + // such a pattern here. + case PPC::CMPWI: + case PPC::CMPLWI: + case PPC::CMPDI: + case PPC::CMPLDI: { + // Doing this post-RA would require dataflow analysis to reliably find uses + // of the CR register set by the compare. + // No need to fixup killed/dead flag since this transformation is only valid + // before RA. + if (PostRA) + return false; + // If a compare-immediate is fed by an immediate and is itself an input of + // an ISEL (the most common case) into a COPY of the correct register. + bool Changed = false; + Register DefReg = MI.getOperand(0).getReg(); + int64_t Comparand = MI.getOperand(2).getImm(); + int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 + ? (Comparand | 0xFFFFFFFFFFFF0000) + : Comparand; + + for (auto &CompareUseMI : MRI->use_instructions(DefReg)) { + unsigned UseOpc = CompareUseMI.getOpcode(); + if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) + continue; + unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); + Register TrueReg = CompareUseMI.getOperand(1).getReg(); + Register FalseReg = CompareUseMI.getOperand(2).getReg(); + unsigned RegToCopy = + selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg); + if (RegToCopy == PPC::NoRegister) + continue; + // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0. + if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) { + CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI)); + replaceInstrOperandWithImm(CompareUseMI, 1, 0); + CompareUseMI.RemoveOperand(3); + CompareUseMI.RemoveOperand(2); + continue; + } + LLVM_DEBUG( + dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n"); + LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump()); + LLVM_DEBUG(dbgs() << "Is converted to:\n"); + // Convert to copy and remove unneeded operands. + CompareUseMI.setDesc(get(PPC::COPY)); + CompareUseMI.RemoveOperand(3); + CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1); + CmpIselsConverted++; + Changed = true; + LLVM_DEBUG(CompareUseMI.dump()); + } + if (Changed) + return true; + // This may end up incremented multiple times since this function is called + // during a fixed-point transformation, but it is only meant to indicate the + // presence of this opportunity. + MissedConvertibleImmediateInstrs++; + return false; + } + + // Immediate forms - may simply be convertable to an LI. + case PPC::ADDI: + case PPC::ADDI8: { + // Does the sum fit in a 16-bit signed field? + int64_t Addend = MI.getOperand(2).getImm(); + if (isInt<16>(Addend + SExtImm)) { + ReplaceWithLI = true; + Is64BitLI = Opc == PPC::ADDI8; + NewImm = Addend + SExtImm; + break; + } + return false; + } + case PPC::RLDICL: + case PPC::RLDICL_rec: + case PPC::RLDICL_32: + case PPC::RLDICL_32_64: { + // Use APInt's rotate function. + int64_t SH = MI.getOperand(2).getImm(); + int64_t MB = MI.getOperand(3).getImm(); + APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32, + SExtImm, true); + InVal = InVal.rotl(SH); + uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1; + InVal &= Mask; + // Can't replace negative values with an LI as that will sign-extend + // and not clear the left bits. If we're setting the CR bit, we will use + // ANDI_rec which won't sign extend, so that's safe. + if (isUInt<15>(InVal.getSExtValue()) || + (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) { + ReplaceWithLI = true; + Is64BitLI = Opc != PPC::RLDICL_32; + NewImm = InVal.getSExtValue(); + SetCR = Opc == PPC::RLDICL_rec; + break; + } + return false; + } + case PPC::RLWINM: + case PPC::RLWINM8: + case PPC::RLWINM_rec: + case PPC::RLWINM8_rec: { + int64_t SH = MI.getOperand(2).getImm(); + int64_t MB = MI.getOperand(3).getImm(); + int64_t ME = MI.getOperand(4).getImm(); + APInt InVal(32, SExtImm, true); + InVal = InVal.rotl(SH); + APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB); + InVal &= Mask; + // Can't replace negative values with an LI as that will sign-extend + // and not clear the left bits. If we're setting the CR bit, we will use + // ANDI_rec which won't sign extend, so that's safe. + bool ValueFits = isUInt<15>(InVal.getSExtValue()); + ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) && + isUInt<16>(InVal.getSExtValue())); + if (ValueFits) { + ReplaceWithLI = true; + Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec; + NewImm = InVal.getSExtValue(); + SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec; + break; + } + return false; + } + case PPC::ORI: + case PPC::ORI8: + case PPC::XORI: + case PPC::XORI8: { + int64_t LogicalImm = MI.getOperand(2).getImm(); + int64_t Result = 0; + if (Opc == PPC::ORI || Opc == PPC::ORI8) + Result = LogicalImm | SExtImm; + else + Result = LogicalImm ^ SExtImm; + if (isInt<16>(Result)) { + ReplaceWithLI = true; + Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8; + NewImm = Result; + break; + } + return false; + } + } + + if (ReplaceWithLI) { + // We need to be careful with CR-setting instructions we're replacing. + if (SetCR) { + // We don't know anything about uses when we're out of SSA, so only + // replace if the new immediate will be reproduced. + bool ImmChanged = (SExtImm & NewImm) != NewImm; + if (PostRA && ImmChanged) + return false; + + if (!PostRA) { + // If the defining load-immediate has no other uses, we can just replace + // the immediate with the new immediate. + if (MRI->hasOneUse(DefMI.getOperand(0).getReg())) + DefMI.getOperand(1).setImm(NewImm); + + // If we're not using the GPR result of the CR-setting instruction, we + // just need to and with zero/non-zero depending on the new immediate. + else if (MRI->use_empty(MI.getOperand(0).getReg())) { + if (NewImm) { + assert(Immediate && "Transformation converted zero to non-zero?"); + NewImm = Immediate; + } + } else if (ImmChanged) + return false; + } + } + + LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "Fed by:\n"); + LLVM_DEBUG(DefMI.dump()); + LoadImmediateInfo LII; + LII.Imm = NewImm; + LII.Is64Bit = Is64BitLI; + LII.SetCR = SetCR; + // If we're setting the CR, the original load-immediate must be kept (as an + // operand to ANDI_rec/ANDI8_rec). + if (KilledDef && SetCR) + *KilledDef = nullptr; + replaceInstrWithLI(MI, LII); + + // Fixup killed/dead flag after transformation. + // Pattern: + // ForwardingOperandReg = LI imm1 + // y = op2 imm2, ForwardingOperandReg(killed) + if (IsForwardingOperandKilled) + fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg); + + LLVM_DEBUG(dbgs() << "With:\n"); + LLVM_DEBUG(MI.dump()); + return true; + } + return false; +} + +bool PPCInstrInfo::transformToNewImmFormFedByAdd( + MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const { + MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); + bool PostRA = !MRI->isSSA(); + // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI + // for post-ra. + if (PostRA) + return false; + + // Only handle load/store. + if (!MI.mayLoadOrStore()) + return false; + + unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode()); + + assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) && + "MI must have x-form opcode"); + + // get Imm Form info. + ImmInstrInfo III; + bool IsVFReg = MI.getOperand(0).isReg() + ? isVFRegister(MI.getOperand(0).getReg()) + : false; + + if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA)) + return false; + + if (!III.IsSummingOperands) + return false; + + if (OpNoForForwarding != III.OpNoForForwarding) + return false; + + MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo); + if (!ImmOperandMI.isImm()) + return false; + + // Check DefMI. + MachineOperand *ImmMO = nullptr; + MachineOperand *RegMO = nullptr; + if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO)) + return false; + assert(ImmMO && RegMO && "Imm and Reg operand must have been set"); + + // Check Imm. + // Set ImmBase from imm instruction as base and get new Imm inside + // isImmElgibleForForwarding. + int64_t ImmBase = ImmOperandMI.getImm(); + int64_t Imm = 0; + if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase)) + return false; + + // Get killed info in case fixup needed after transformation. + unsigned ForwardKilledOperandReg = ~0U; + if (MI.getOperand(III.OpNoForForwarding).isKill()) + ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg(); + + // Do the transform + LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "Fed by:\n"); + LLVM_DEBUG(DefMI.dump()); + + MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg()); + MI.getOperand(III.OpNoForForwarding).setIsKill(RegMO->isKill()); + MI.getOperand(III.ImmOpNo).setImm(Imm); + + // FIXME: fix kill/dead flag if MI and DefMI are not in same basic block. + if (DefMI.getParent() == MI.getParent()) { + // Check if reg is killed between MI and DefMI. + auto IsKilledFor = [&](unsigned Reg) { + MachineBasicBlock::const_reverse_iterator It = MI; + MachineBasicBlock::const_reverse_iterator E = DefMI; + It++; + for (; It != E; ++It) { + if (It->killsRegister(Reg)) + return true; + } + return false; + }; + + // Update kill flag + if (RegMO->isKill() || IsKilledFor(RegMO->getReg())) + fixupIsDeadOrKill(DefMI, MI, RegMO->getReg()); + if (ForwardKilledOperandReg != ~0U) + fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); + } + + LLVM_DEBUG(dbgs() << "With:\n"); + LLVM_DEBUG(MI.dump()); + return true; +} + // If an X-Form instruction is fed by an add-immediate and one of its operands // is the literal zero, attempt to forward the source of the add-immediate to // the corresponding D-Form instruction with the displacement coming from @@ -3718,8 +4070,15 @@ bool PPCInstrInfo::transformToImmFormFedByAdd( bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, unsigned ConstantOpNo, - MachineInstr &DefMI, - int64_t Imm) const { + MachineInstr &DefMI) const { + // DefMI must be LI or LI8. + if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) || + !DefMI.getOperand(1).isImm()) + return false; + + // Get Imm operand and Sign-extend to 64-bits. + int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm()); + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); bool PostRA = !MRI.isSSA(); // Exit early if we can't convert this. |