aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCFrameLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp942
1 files changed, 554 insertions, 388 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 4c608520e265..bd9174c1973d 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -31,6 +32,7 @@ using namespace llvm;
#define DEBUG_TYPE "framelowering"
STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+STATISTIC(NumPrologProbed, "Number of prologues probed");
static cl::opt<bool>
EnablePEVectorSpills("ppc-enable-pe-vector-spills",
@@ -47,7 +49,7 @@ static const MCPhysReg VRRegNo[] = {
};
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI() || STI.isAIXABI())
+ if (STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
// SVR4 ABI:
return STI.isPPC64() ? 16 : 4;
@@ -60,20 +62,12 @@ static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
}
static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
- // For the Darwin ABI:
- // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
- // for saving the frame pointer (if needed.) While the published ABI has
- // not used this slot since at least MacOSX 10.2, there is older code
- // around that does use it, and that needs to continue to work.
- if (STI.isDarwinABI())
- return STI.isPPC64() ? -8U : -4U;
-
- // SVR4 ABI: First slot in the general register save area.
+ // First slot in the general register save area.
return STI.isPPC64() ? -8U : -4U;
}
static unsigned computeLinkageSize(const PPCSubtarget &STI) {
- if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
+ if (STI.isAIXABI() || STI.isPPC64())
return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
// 32-bit SVR4 ABI:
@@ -81,18 +75,16 @@ static unsigned computeLinkageSize(const PPCSubtarget &STI) {
}
static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI())
- return STI.isPPC64() ? -16U : -8U;
+ // Third slot in the general purpose register save area.
+ if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
+ return -12U;
- // SVR4 ABI: First slot in the general register save area.
- return STI.isPPC64()
- ? -16U
- : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
+ // Second slot in the general purpose register save area.
+ return STI.isPPC64() ? -16U : -8U;
}
-static unsigned computeCRSaveOffset() {
- // The condition register save offset needs to be updated for AIX PPC32.
- return 8;
+static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
+ return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
}
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
@@ -103,71 +95,97 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
LinkageSize(computeLinkageSize(Subtarget)),
BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
- CRSaveOffset(computeCRSaveOffset()) {}
+ CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
unsigned &NumEntries) const {
- if (Subtarget.isDarwinABI()) {
- NumEntries = 1;
- if (Subtarget.isPPC64()) {
- static const SpillSlot darwin64Offsets = {PPC::X31, -8};
- return &darwin64Offsets;
- } else {
- static const SpillSlot darwinOffsets = {PPC::R31, -4};
- return &darwinOffsets;
- }
- }
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- NumEntries = 0;
- return nullptr;
- }
+// Floating-point register save area offsets.
+#define CALLEE_SAVED_FPRS \
+ {PPC::F31, -8}, \
+ {PPC::F30, -16}, \
+ {PPC::F29, -24}, \
+ {PPC::F28, -32}, \
+ {PPC::F27, -40}, \
+ {PPC::F26, -48}, \
+ {PPC::F25, -56}, \
+ {PPC::F24, -64}, \
+ {PPC::F23, -72}, \
+ {PPC::F22, -80}, \
+ {PPC::F21, -88}, \
+ {PPC::F20, -96}, \
+ {PPC::F19, -104}, \
+ {PPC::F18, -112}, \
+ {PPC::F17, -120}, \
+ {PPC::F16, -128}, \
+ {PPC::F15, -136}, \
+ {PPC::F14, -144}
+
+// 32-bit general purpose register save area offsets shared by ELF and
+// AIX. AIX has an extra CSR with r13.
+#define CALLEE_SAVED_GPRS32 \
+ {PPC::R31, -4}, \
+ {PPC::R30, -8}, \
+ {PPC::R29, -12}, \
+ {PPC::R28, -16}, \
+ {PPC::R27, -20}, \
+ {PPC::R26, -24}, \
+ {PPC::R25, -28}, \
+ {PPC::R24, -32}, \
+ {PPC::R23, -36}, \
+ {PPC::R22, -40}, \
+ {PPC::R21, -44}, \
+ {PPC::R20, -48}, \
+ {PPC::R19, -52}, \
+ {PPC::R18, -56}, \
+ {PPC::R17, -60}, \
+ {PPC::R16, -64}, \
+ {PPC::R15, -68}, \
+ {PPC::R14, -72}
+
+// 64-bit general purpose register save area offsets.
+#define CALLEE_SAVED_GPRS64 \
+ {PPC::X31, -8}, \
+ {PPC::X30, -16}, \
+ {PPC::X29, -24}, \
+ {PPC::X28, -32}, \
+ {PPC::X27, -40}, \
+ {PPC::X26, -48}, \
+ {PPC::X25, -56}, \
+ {PPC::X24, -64}, \
+ {PPC::X23, -72}, \
+ {PPC::X22, -80}, \
+ {PPC::X21, -88}, \
+ {PPC::X20, -96}, \
+ {PPC::X19, -104}, \
+ {PPC::X18, -112}, \
+ {PPC::X17, -120}, \
+ {PPC::X16, -128}, \
+ {PPC::X15, -136}, \
+ {PPC::X14, -144}
+
+// Vector register save area offsets.
+#define CALLEE_SAVED_VRS \
+ {PPC::V31, -16}, \
+ {PPC::V30, -32}, \
+ {PPC::V29, -48}, \
+ {PPC::V28, -64}, \
+ {PPC::V27, -80}, \
+ {PPC::V26, -96}, \
+ {PPC::V25, -112}, \
+ {PPC::V24, -128}, \
+ {PPC::V23, -144}, \
+ {PPC::V22, -160}, \
+ {PPC::V21, -176}, \
+ {PPC::V20, -192}
// Note that the offsets here overlap, but this is fixed up in
// processFunctionBeforeFrameFinalized.
- static const SpillSlot Offsets[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::R31, -4},
- {PPC::R30, -8},
- {PPC::R29, -12},
- {PPC::R28, -16},
- {PPC::R27, -20},
- {PPC::R26, -24},
- {PPC::R25, -28},
- {PPC::R24, -32},
- {PPC::R23, -36},
- {PPC::R22, -40},
- {PPC::R21, -44},
- {PPC::R20, -48},
- {PPC::R19, -52},
- {PPC::R18, -56},
- {PPC::R17, -60},
- {PPC::R16, -64},
- {PPC::R15, -68},
- {PPC::R14, -72},
+ static const SpillSlot ELFOffsets32[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
// CR save area offset. We map each of the nonvolatile CR fields
// to the slot for CR2, which is the first of the nonvolatile CR
@@ -178,19 +196,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
// VRSAVE save area offset.
{PPC::VRSAVE, -4},
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192},
+ CALLEE_SAVED_VRS,
// SPE register save area (overlaps Vector save area).
{PPC::S31, -8},
@@ -212,73 +218,48 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
{PPC::S15, -136},
{PPC::S14, -144}};
- static const SpillSlot Offsets64[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::X31, -8},
- {PPC::X30, -16},
- {PPC::X29, -24},
- {PPC::X28, -32},
- {PPC::X27, -40},
- {PPC::X26, -48},
- {PPC::X25, -56},
- {PPC::X24, -64},
- {PPC::X23, -72},
- {PPC::X22, -80},
- {PPC::X21, -88},
- {PPC::X20, -96},
- {PPC::X19, -104},
- {PPC::X18, -112},
- {PPC::X17, -120},
- {PPC::X16, -128},
- {PPC::X15, -136},
- {PPC::X14, -144},
+ static const SpillSlot ELFOffsets64[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS64,
// VRSAVE save area offset.
{PPC::VRSAVE, -4},
+ CALLEE_SAVED_VRS
+ };
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}};
+ static const SpillSlot AIXOffsets32[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
+ // Add AIX's extra CSR.
+ {PPC::R13, -76},
+ // TODO: Update when we add vector support for AIX.
+ };
- if (Subtarget.isPPC64()) {
- NumEntries = array_lengthof(Offsets64);
+ static const SpillSlot AIXOffsets64[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS64,
+ // TODO: Update when we add vector support for AIX.
+ };
- return Offsets64;
- } else {
- NumEntries = array_lengthof(Offsets);
+ if (Subtarget.is64BitELFABI()) {
+ NumEntries = array_lengthof(ELFOffsets64);
+ return ELFOffsets64;
+ }
- return Offsets;
+ if (Subtarget.is32BitELFABI()) {
+ NumEntries = array_lengthof(ELFOffsets32);
+ return ELFOffsets32;
}
+
+ assert(Subtarget.isAIXABI() && "Unexpected ABI.");
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(AIXOffsets64);
+ return AIXOffsets64;
+ }
+
+ NumEntries = array_lengthof(AIXOffsets32);
+ return AIXOffsets32;
}
/// RemoveVRSaveCode - We have found that this function does not need any code
@@ -479,9 +460,9 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
// Get stack alignments. The frame must be aligned to the greatest of these:
- unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
- unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
- unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
+ Align TargetAlign = getStackAlign(); // alignment required per the ABI
+ Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
+ Align Alignment = std::max(TargetAlign, MaxAlign);
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -513,7 +494,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
// that allocations will be aligned.
if (MFI.hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+ maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
// Update the new max call frame size if the caller passes in a valid pointer.
if (NewMaxCallFrameSize)
@@ -523,7 +504,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
FrameSize += maxCallFrameSize;
// Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+ FrameSize = alignTo(FrameSize, Alignment);
return FrameSize;
}
@@ -613,11 +594,11 @@ bool
PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
bool TwoUniqueRegsRequired,
- unsigned *SR1,
- unsigned *SR2) const {
+ Register *SR1,
+ Register *SR2) const {
RegScavenger RS;
- unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
- unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
+ Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
+ Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
// Set the defaults for the two scratch registers.
if (SR1)
@@ -684,7 +665,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
if (SecondScratchReg != -1)
*SR2 = SecondScratchReg;
else
- *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
+ *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
}
// Now that we've done our best to provide both registers, double check
@@ -709,7 +690,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
@@ -778,11 +759,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
- bool needsCFI = MF.needsFrameMoves();
+ // AIX assembler does not support cfi directives.
+ const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
@@ -790,8 +773,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
- "Unsupported PPC ABI.");
+ assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
@@ -821,20 +803,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
bool MustSaveTOC = FI->mustSaveTOC();
- const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
bool HasRedZone = isPPC64 || !isSVR4ABI;
- unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register BPReg = RegInfo->getBaseRegister(MF);
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
- unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
- unsigned ScratchReg = 0;
- unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
+ Register ScratchReg;
+ Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
: PPC::MFLR );
@@ -854,6 +836,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
: PPC::SUBFC);
const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
: PPC::SUBFIC);
+ const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
+ : PPC::MFCR);
+ const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
// Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
// LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
@@ -863,9 +848,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
// Using the same bool variable as below to suppress compiler warnings.
- bool SingleScratchReg =
- findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
- &ScratchReg, &TempReg);
+ // Stack probe requires two scratch registers, one for old sp, one for large
+ // frame and large probe size.
+ bool SingleScratchReg = findScratchRegister(
+ &MBB, false,
+ twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
+ &ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
@@ -906,21 +894,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
// Get stack alignments.
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
if (HasBP && MaxAlign > 1)
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
+ assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
// Frames of 32KB & larger require special handling because they cannot be
// indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
bool isLargeFrame = !isInt<16>(NegFrameSize);
- assert((isPPC64 || !MustSaveCR) &&
- "Prologue CR saving supported only in 64-bit mode");
-
- if (MustSaveCR && isAIXABI)
- report_fatal_error("Prologue CR saving is unimplemented on AIX.");
-
// Check if we can move the stack update instruction (stdu) down the prologue
// past the callee saves. Hopefully this will avoid the situation where the
// saves are waiting for the update on the store with update to complete.
@@ -960,49 +941,42 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // If we need to spill the CR and the LR but we don't have two separate
- // registers available, we must spill them one at a time
- if (MustSaveCR && SingleScratchReg && MustSaveLR) {
+ // Where in the prologue we move the CR fields depends on how many scratch
+ // registers we have, and if we need to save the link register or not. This
+ // lambda is to avoid duplicating the logic in 2 places.
+ auto BuildMoveFromCR = [&]() {
+ if (isELFv2ABI && MustSaveCRs.size() == 1) {
// In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it is more efficient to use
- // mfocrf to selectively save just those fields, because mfocrf has short
+ // If only one CR field is clobbered, it is more efficient to use
+ // mfocrf to selectively save just that field, because mfocrf has short
// latency compares to mfcr.
- unsigned MfcrOpcode = PPC::MFCR8;
- unsigned CrState = RegState::ImplicitKill;
- if (isELFv2ABI && MustSaveCRs.size() == 1) {
- MfcrOpcode = PPC::MFOCRF8;
- CrState = RegState::Kill;
+ assert(isPPC64 && "V2 ABI is 64-bit only.");
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
+ MIB.addReg(MustSaveCRs[0], RegState::Kill);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
+ for (unsigned CRfield : MustSaveCRs)
+ MIB.addReg(CRfield, RegState::ImplicitKill);
}
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], CrState);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
- .addReg(TempReg, getKillRegState(true))
- .addImm(getCRSaveOffset())
- .addReg(SPReg);
+ };
+
+ // If we need to spill the CR and the LR but we don't have two separate
+ // registers available, we must spill them one at a time
+ if (MustSaveCR && SingleScratchReg && MustSaveLR) {
+ BuildMoveFromCR();
+ BuildMI(MBB, MBBI, dl, StoreWordInst)
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(CRSaveOffset)
+ .addReg(SPReg);
}
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
- if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
- // In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it is more efficient to use
- // mfocrf to selectively save just those fields, because mfocrf has short
- // latency compares to mfcr.
- unsigned MfcrOpcode = PPC::MFCR8;
- unsigned CrState = RegState::ImplicitKill;
- if (isELFv2ABI && MustSaveCRs.size() == 1) {
- MfcrOpcode = PPC::MFOCRF8;
- CrState = RegState::Kill;
- }
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], CrState);
- }
+ if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
+ BuildMoveFromCR();
if (HasRedZone) {
if (HasFP)
@@ -1029,11 +1003,11 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SPReg);
if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
+ !(SingleScratchReg && MustSaveLR)) {
assert(HasRedZone && "A red zone is always available on PPC64");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ BuildMI(MBB, MBBI, dl, StoreWordInst)
.addReg(TempReg, getKillRegState(true))
- .addImm(getCRSaveOffset())
+ .addImm(CRSaveOffset)
.addReg(SPReg);
}
@@ -1055,58 +1029,81 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// the negated frame size will be placed in ScratchReg.
bool HasSTUX = false;
- // This condition must be kept in sync with canUseAsPrologue.
- if (HasBP && MaxAlign > 1) {
- if (isPPC64)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(64 - Log2_32(MaxAlign));
- else // PPC32...
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(32 - Log2_32(MaxAlign))
- .addImm(31);
- if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
+ // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
+ // pointer is always stored at SP, we will get a free probe due to an essential
+ // STU(X) instruction.
+ if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
+ // To be consistent with other targets, a pseudo instruction is emitted and
+ // will be later expanded in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, dl,
+ TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
+ : PPC::PROBED_STACKALLOC_32))
+ .addDef(ScratchReg)
+ .addDef(TempReg) // TempReg stores the old sp.
.addImm(NegFrameSize);
- } else {
- assert(!SingleScratchReg && "Only a single scratch reg available");
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
- .addReg(TempReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addReg(TempReg, RegState::Kill);
+ // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
+ // update the ScratchReg to meet the assumption that ScratchReg contains
+ // the NegFrameSize. This solution is rather tricky.
+ if (!HasRedZone) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
+ .addReg(TempReg)
+ .addReg(SPReg);
+ HasSTUX = true;
}
+ } else {
+ // This condition must be kept in sync with canUseAsPrologue.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ assert(!SingleScratchReg && "Only a single scratch reg available");
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
+ }
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
- } else if (!isLargeFrame) {
- BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
- .addReg(SPReg)
- .addImm(NegFrameSize)
- .addReg(SPReg);
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
- } else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
+ }
}
// Save the TOC register after the stack pointer update if a prologue TOC
@@ -1247,7 +1244,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Adjust the definition of CFA to account for the change in SP.
assert(NegFrameSize);
CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
}
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -1337,7 +1334,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// actually saved gets its own CFI record.
unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
+ nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
@@ -1367,6 +1364,175 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ // TODO: Generate CFI instructions.
+ bool isPPC64 = Subtarget.isPPC64();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ // AIX assembler does not support cfi directives.
+ const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
+ auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
+ int Opc = MI.getOpcode();
+ return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
+ });
+ if (StackAllocMIPos == PrologMBB.end())
+ return;
+ const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+ DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
+ MachineInstr &MI = *StackAllocMIPos;
+ int64_t NegFrameSize = MI.getOperand(2).getImm();
+ int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+ assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
+ int64_t NumBlocks = NegFrameSize / NegProbeSize;
+ int64_t NegResidualSize = NegFrameSize % NegProbeSize;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register ScratchReg = MI.getOperand(0).getReg();
+ Register FPReg = MI.getOperand(1).getReg();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ bool HasBP = RegInfo->hasBasePointer(MF);
+ Align MaxAlign = MFI.getMaxAlign();
+ // Initialize current frame pointer.
+ const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
+ BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+ // Subroutines to generate .cfi_* directives.
+ auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register Reg) {
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ };
+ auto buildDefCFA = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register Reg,
+ int Offset) {
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MBB.getParent()->addFrameInst(
+ MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ };
+ // Subroutine to determine if we can use the Imm as part of d-form.
+ auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
+ // Subroutine to materialize the Imm into TempReg.
+ auto MaterializeImm = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t Imm,
+ Register &TempReg) {
+ assert(isInt<32>(Imm) && "Unhandled imm");
+ if (isInt<16>(Imm))
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
+ .addImm(Imm);
+ else {
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
+ .addImm(Imm >> 16);
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
+ .addReg(TempReg)
+ .addImm(Imm & 0xFFFF);
+ }
+ };
+ // Subroutine to store frame pointer and decrease stack pointer by probe size.
+ auto allocateAndProbe = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t NegSize,
+ Register NegSizeReg, bool UseDForm) {
+ if (UseDForm)
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
+ .addReg(FPReg)
+ .addImm(NegSize)
+ .addReg(SPReg);
+ else
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(NegSizeReg);
+ };
+ // Use FPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(PrologMBB, {MI}, FPReg, 0);
+ // For case HasBP && MaxAlign > 1, we have to align the SP by performing
+ // SP = SP - SP % MaxAlign.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
+ SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ }
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm);
+ }
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(PrologMBB, {MI}, SPReg);
+ }
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(PrologMBB.getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), &PrologMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ PrologMBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
+ PrologMBB.addSuccessor(LoopMBB);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ }
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
+ }
+ ++NumPrologProbed;
+ MI.eraseFromParent();
+}
+
void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
@@ -1392,18 +1558,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
- const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
- unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register BPReg = RegInfo->getBaseRegister(MF);
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned ScratchReg = 0;
- unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ Register ScratchReg;
+ Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
: PPC::MTLR );
const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
@@ -1418,7 +1584,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::ADDI );
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
: PPC::ADD4 );
-
+ const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
+ : PPC::LWZ);
+ const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
+ : PPC::MTOCRF);
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
@@ -1593,20 +1762,17 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// value (although not the base register). Make sure it is not overwritten
// too early.
- assert((isPPC64 || !MustSaveCR) &&
- "Epilogue CR restoring supported only in 64-bit mode");
-
// If we need to restore both the LR and the CR and we only have one
// available scratch register, we must do them one at a time.
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
// Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
// is live here.
assert(HasRedZone && "Expecting red zone");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(getCRSaveOffset())
+ BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
+ .addImm(CRSaveOffset)
.addReg(SPReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));
}
@@ -1623,11 +1789,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
- // This will only occur for PPC64.
- assert(isPPC64 && "Expecting 64-bit mode");
assert(RBReg == SPReg && "Should be using SP as a base register");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(getCRSaveOffset())
+ BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
+ .addImm(CRSaveOffset)
.addReg(RBReg);
}
@@ -1682,9 +1846,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
+ !(SingleScratchReg && MustSaveLR))
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
@@ -1729,13 +1893,25 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
DebugLoc dl = MBBI->getDebugLoc();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
- // Create branch instruction for pseudo tail call return instruction
+ // Create branch instruction for pseudo tail call return instruction.
+ // The TCRETURNdi variants are direct calls. Valid targets for those are
+ // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
+ // since we can tail call external functions with PC-Rel (i.e. we don't need
+ // to worry about different TOC pointers). Some of the external functions will
+ // be MO_GlobalAddress while others like memcpy for example, are going to
+ // be MO_ExternalSymbol.
unsigned RetOpcode = MBBI->getOpcode();
if (RetOpcode == PPC::TCRETURNdi) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ if (JumpTarget.isGlobal())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (JumpTarget.isSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addExternalSymbol(JumpTarget.getSymbolName());
+ else
+ llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1747,8 +1923,14 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
} else if (RetOpcode == PPC::TCRETURNdi8) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ if (JumpTarget.isGlobal())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (JumpTarget.isSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addExternalSymbol(JumpTarget.getSymbolName());
+ else
+ llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri8) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1776,7 +1958,6 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
const bool isPPC64 = Subtarget.isPPC64();
- const bool IsDarwinABI = Subtarget.isDarwinABI();
MachineFrameInfo &MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -1823,25 +2004,26 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
- // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
- // function uses CR 2, 3, or 4.
- if (!isPPC64 && !IsDarwinABI &&
- (SavedRegs.test(PPC::CR2) ||
- SavedRegs.test(PPC::CR3) ||
+ // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
+ // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
+ // object at the offset of the CR-save slot in the linkage area. The actual
+ // save and restore of the condition register will be created as part of the
+ // prologue and epilogue insertion, but the FixedStack object is needed to
+ // keep the CalleSavedInfo valid.
+ if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
SavedRegs.test(PPC::CR4))) {
- int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
+ const int64_t SpillOffset =
+ Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
+ int FrameIdx =
+ MFI.CreateFixedObject(SpillSize, SpillOffset,
+ /* IsImmutable */ true, /* IsAliased */ false);
FI->setCRSpillFrameIndex(FrameIdx);
}
}
void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- addScavengingSpillSlot(MF, RS);
- return;
- }
-
// Get callee saved register information.
MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
@@ -2014,11 +2196,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
std::min<unsigned>(TRI->getEncodingValue(MinGPR),
TRI->getEncodingValue(MinG8R));
- if (Subtarget.isPPC64()) {
- LowerBound -= (31 - MinReg + 1) * 8;
- } else {
- LowerBound -= (31 - MinReg + 1) * 4;
- }
+ const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
+ LowerBound -= (31 - MinReg + 1) * GPRegSize;
}
// For 32-bit only, the CR save area is below the general register
@@ -2026,19 +2205,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// to the stack pointer and hence does not need an adjustment here.
// Only CR2 (the first nonvolatile spilled) has an associated frame
// index so that we have a single uniform save area.
- if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
+ if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
// Adjust the frame index of the CR spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
- // Leave Darwin logic as-is.
- || (!Subtarget.isSVR4ABI() &&
- (PPC::CRBITRCRegClass.contains(Reg) ||
- PPC::CRRCRegClass.contains(Reg)))) {
- int FI = CSI[i].getFrameIdx();
-
+ for (const auto &CSInfo : CSI) {
+ if (CSInfo.getReg() == PPC::CR2) {
+ int FI = CSInfo.getFrameIdx();
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ break;
}
}
@@ -2108,17 +2281,17 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
unsigned Size = TRI.getSpillSize(RC);
- unsigned Align = TRI.getSpillAlignment(RC);
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
+ Align Alignment = TRI.getSpillAlign(RC);
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
// Might we have over-aligned allocas?
- bool HasAlVars = MFI.hasVarSizedObjects() &&
- MFI.getMaxAlignment() > getStackAlignment();
+ bool HasAlVars =
+ MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
// These kinds of spills might need two registers.
if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
-
+ RS->addScavengingFrameIndex(
+ MFI.CreateStackObject(Size, Alignment, false));
}
}
@@ -2179,17 +2352,9 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
return AllSpilledToReg;
}
-
-bool
-PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
-
- // Currently, this function only handles SVR4 32- and 64-bit ABIs.
- // Return false otherwise to maintain pre-existing behavior.
- if (!Subtarget.isSVR4ABI())
- return false;
+bool PPCFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -2201,10 +2366,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // Only Darwin actually uses the VRSAVE register, but it can still appear
- // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
- // Darwin, ignore it.
- if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
+ if (Reg == PPC::VRSAVE)
continue;
// CR2 through CR4 are the nonvolatile CR fields.
@@ -2232,7 +2395,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Insert the spill to the stack frame.
if (IsCRField) {
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
- if (Subtarget.isPPC64()) {
+ if (!Subtarget.is32BitELFABI()) {
// The actual spill will happen at the start of the prologue.
FuncInfo->addMustSaveCR(Reg);
} else {
@@ -2260,37 +2423,37 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Use !IsLiveIn for the kill flag.
// We do not want to kill registers that are live in this function
// before their use because they will become undefined registers.
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
+ CSI[i].getFrameIdx(), RC, TRI);
+ else
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
+ RC, TRI);
}
}
}
return true;
}
-static void
-restoreCRs(bool isPPC64, bool is31,
- bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
+ bool CR4Spilled, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
DebugLoc DL;
- unsigned RestoreOp, MoveReg;
+ unsigned MoveReg = PPC::R12;
- if (isPPC64)
- // This is handled during epilogue generation.
- return;
- else {
- // 32-bit: FP-relative
- MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
- PPC::R12),
- CSI[CSIIndex].getFrameIdx()));
- RestoreOp = PPC::MTOCRF;
- MoveReg = PPC::R12;
- }
+ // 32-bit: FP-relative
+ MBB.insert(MI,
+ addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
+ CSI[CSIIndex].getFrameIdx()));
+ unsigned RestoreOp = PPC::MTOCRF;
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
.addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
@@ -2343,17 +2506,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
return MBB.erase(I);
}
-bool
-PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
-
- // Currently, this function only handles SVR4 32- and 64-bit ABIs.
- // Return false otherwise to maintain pre-existing behavior.
- if (!Subtarget.isSVR4ABI())
- return false;
+static bool isCalleeSavedCR(unsigned Reg) {
+ return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
+}
+bool PPCFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
@@ -2374,15 +2533,18 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // Only Darwin actually uses the VRSAVE register, but it can still appear
- // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
- // Darwin, ignore it.
- if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
+ if (Reg == PPC::VRSAVE)
continue;
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;
+ // Restore of callee saved condition register field is handled during
+ // epilogue insertion.
+ if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
+ continue;
+
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
@@ -2396,14 +2558,12 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
CR4Spilled = true;
continue;
} else {
- // When we first encounter a non-CR register after seeing at
+ // On 32-bit ELF when we first encounter a non-CR register after seeing at
// least one CR register, restore all spilled CRs together.
- if ((CR2Spilled || CR3Spilled || CR4Spilled)
- && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ if (CR2Spilled || CR3Spilled || CR4Spilled) {
bool is31 = needsFP(*MF);
- restoreCRs(Subtarget.isPPC64(), is31,
- CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
+ CSIIndex);
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
@@ -2415,7 +2575,16 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
} else {
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
+ TRI);
+ else
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
assert(I != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
}
@@ -2432,9 +2601,10 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// If we haven't yet spilled the CRs, do so now.
if (CR2Spilled || CR3Spilled || CR4Spilled) {
+ assert(Subtarget.is32BitELFABI() &&
+ "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
bool is31 = needsFP(*MF);
- restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
}
return true;
@@ -2445,14 +2615,10 @@ unsigned PPCFrameLowering::getTOCSaveOffset() const {
}
unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
- if (Subtarget.isAIXABI())
- report_fatal_error("FramePointer is not implemented on AIX yet.");
return FramePointerSaveOffset;
}
unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
- if (Subtarget.isAIXABI())
- report_fatal_error("BasePointer is not implemented on AIX yet.");
return BasePointerSaveOffset;
}