summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64FrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp895
1 files changed, 535 insertions, 360 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 3f63d049c34ed..82111e5c72593 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -93,6 +93,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -127,12 +128,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned NumBytes = AFI->getLocalStackSize();
- // Note: currently hasFP() is always true for hasCalls(), but that's an
- // implementation detail of the current code, not a strict requirement,
- // so stay safe here and check both.
- if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
- return false;
- return true;
+ return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128);
}
/// hasFP - Return true if the specified function should have a dedicated frame
@@ -140,9 +136,12 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
- MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
+ // Retain behavior of always omitting the FP for leaf functions when possible.
+ return (MFI->hasCalls() &&
+ MF.getTarget().Options.DisableFramePointerElim(MF)) ||
+ MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint() ||
+ RegInfo->needsStackRealignment(MF);
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -155,7 +154,7 @@ AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
-void AArch64FrameLowering::eliminateCallFramePseudoInstr(
+MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const AArch64InstrInfo *TII =
@@ -170,7 +169,7 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
unsigned Align = getStackAlignment();
int64_t Amount = I->getOperand(0).getImm();
- Amount = RoundUpToAlignment(Amount, Align);
+ Amount = alignTo(Amount, Align);
if (!IsDestroy)
Amount = -Amount;
@@ -186,7 +185,7 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
// 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
// LSL #0, and the other uses LSL #12.
//
- // Mostly call frames will be allocated at the start of a function so
+ // Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
@@ -198,12 +197,11 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
TII);
}
- MBB.erase(I);
+ return MBB.erase(I);
}
void AArch64FrameLowering::emitCalleeSavedFrameMoves(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- unsigned FramePtr) const {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
@@ -216,75 +214,194 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
if (CSI.empty())
return;
- const DataLayout &TD = MF.getDataLayout();
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth = -TD.getPointerSize(0);
-
- // Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
- unsigned TotalSkipped = 0;
for (const auto &Info : CSI) {
unsigned Reg = Info.getReg();
- int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
- getOffsetOfLocalArea() + saveAreaOffset;
-
- // Don't output a new CFI directive if we're re-saving the frame pointer or
- // link register. This happens when the PrologEpilogInserter has inserted an
- // extra "STP" of the frame pointer and link register -- the "emitPrologue"
- // method automatically generates the directives when frame pointers are
- // used. If we generate CFI directives for the extra "STP"s, the linker will
- // lose track of the correct values for the frame pointer and link register.
- if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) {
- TotalSkipped += stackGrowth;
- continue;
- }
-
+ int64_t Offset =
+ MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, DwarfReg, Offset - TotalSkipped));
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
}
-/// Get FPOffset by analyzing the first instruction.
-static int getFPOffsetInPrologue(MachineInstr *MBBI) {
- // First instruction must a) allocate the stack and b) have an immediate
- // that is a multiple of -2.
- assert(((MBBI->getOpcode() == AArch64::STPXpre ||
- MBBI->getOpcode() == AArch64::STPDpre) &&
- MBBI->getOperand(3).getReg() == AArch64::SP &&
- MBBI->getOperand(4).getImm() < 0 &&
- (MBBI->getOperand(4).getImm() & 1) == 0));
-
- // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
- // required for the callee saved register area we get the frame pointer
- // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
- int FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
- assert(FPOffset >= 0 && "Bad Framepointer Offset");
- return FPOffset;
-}
+// Find a scratch register that we can use at the start of the prologue to
+// re-align the stack pointer. We avoid using callee-save registers since they
+// may appear to be free when this is called from canUseAsPrologue (during
+// shrink wrapping), but then no longer be free when this is called from
+// emitPrologue.
+//
+// FIXME: This is a bit conservative, since in the above case we could use one
+// of the callee-save registers as a scratch temp to re-align the stack pointer,
+// but we would then have to make sure that we were in fact saving at least one
+// callee-save register in the prologue, which is additional complexity that
+// doesn't seem worth the benefit.
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
+ MachineFunction *MF = MBB->getParent();
+
+ // If MBB is an entry block, use X9 as the scratch register
+ if (&MF->front() == MBB)
+ return AArch64::X9;
+
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ LivePhysRegs LiveRegs(&TRI);
+ LiveRegs.addLiveIns(*MBB);
+
+ // Mark callee saved registers as used so we will not choose them.
+ const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ LiveRegs.addReg(CSRegs[i]);
+
+ // Prefer X9 since it was historically used for the prologue scratch reg.
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (LiveRegs.available(MRI, AArch64::X9))
+ return AArch64::X9;
-static bool isCSSave(MachineInstr *MBBI) {
- return MBBI->getOpcode() == AArch64::STPXi ||
- MBBI->getOpcode() == AArch64::STPDi ||
- MBBI->getOpcode() == AArch64::STPXpre ||
- MBBI->getOpcode() == AArch64::STPDpre;
+ for (unsigned Reg : AArch64::GPR64RegClass) {
+ if (LiveRegs.available(MRI, Reg))
+ return Reg;
+ }
+ return AArch64::NoRegister;
}
bool AArch64FrameLowering::canUseAsPrologue(
const MachineBasicBlock &MBB) const {
const MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Don't need a scratch register if we're not going to re-align the stack.
- // Otherwise, we may need a scratch register to be available and we do not
- // support that for now.
- return !RegInfo->needsStackRealignment(*MF);
+ if (!RegInfo->needsStackRealignment(*MF))
+ return true;
+ // Otherwise, we can use any block as long as it has a scratch register
+ // available.
+ return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
+}
+
+bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
+ MachineFunction &MF, unsigned StackBumpBytes) const {
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+ if (AFI->getLocalStackSize() == 0)
+ return false;
+
+ // 512 is the maximum immediate for stp/ldp that will be used for
+ // callee-save save/restores
+ if (StackBumpBytes >= 512)
+ return false;
+
+ if (MFI->hasVarSizedObjects())
+ return false;
+
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
+ // This isn't strictly necessary, but it simplifies things a bit since the
+ // current RedZone handling code assumes the SP is adjusted by the
+ // callee-save save/restore code.
+ if (canUseRedZone(MF))
+ return false;
+
+ return true;
+}
+
+// Convert callee-save register save/restore instruction to do stack pointer
+// decrement/increment to allocate/deallocate the callee-save stack area by
+// converting store/load to use pre/post increment version.
+static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
+
+ unsigned NewOpc;
+ bool NewIsUnscaled = false;
+ switch (MBBI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected callee-save save/restore opcode!");
+ case AArch64::STPXi:
+ NewOpc = AArch64::STPXpre;
+ break;
+ case AArch64::STPDi:
+ NewOpc = AArch64::STPDpre;
+ break;
+ case AArch64::STRXui:
+ NewOpc = AArch64::STRXpre;
+ NewIsUnscaled = true;
+ break;
+ case AArch64::STRDui:
+ NewOpc = AArch64::STRDpre;
+ NewIsUnscaled = true;
+ break;
+ case AArch64::LDPXi:
+ NewOpc = AArch64::LDPXpost;
+ break;
+ case AArch64::LDPDi:
+ NewOpc = AArch64::LDPDpost;
+ break;
+ case AArch64::LDRXui:
+ NewOpc = AArch64::LDRXpost;
+ NewIsUnscaled = true;
+ break;
+ case AArch64::LDRDui:
+ NewOpc = AArch64::LDRDpost;
+ NewIsUnscaled = true;
+ break;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ MIB.addReg(AArch64::SP, RegState::Define);
+
+ // Copy all operands other than the immediate offset.
+ unsigned OpndIdx = 0;
+ for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
+ ++OpndIdx)
+ MIB.addOperand(MBBI->getOperand(OpndIdx));
+
+ assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
+ "Unexpected immediate offset in first/last callee-save save/restore "
+ "instruction!");
+ assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
+ "Unexpected base register in callee-save save/restore instruction!");
+ // Last operand is immediate offset that needs fixing.
+ assert(CSStackSizeInc % 8 == 0);
+ int64_t CSStackSizeIncImm = CSStackSizeInc;
+ if (!NewIsUnscaled)
+ CSStackSizeIncImm /= 8;
+ MIB.addImm(CSStackSizeIncImm);
+
+ MIB.setMIFlags(MBBI->getFlags());
+ MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
+
+ return std::prev(MBB.erase(MBBI));
+}
+
+// Fixup callee-save register save/restore instructions to take into account
+// combined SP bump by adding the local stack size to the stack offsets.
+static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
+ unsigned LocalStackSize) {
+ unsigned Opc = MI.getOpcode();
+ (void)Opc;
+ assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
+ Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
+ Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
+ Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
+ "Unexpected callee-save save/restore opcode!");
+
+ unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
+ assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
+ "Unexpected base register in callee-save save/restore instruction!");
+ // Last operand is immediate offset that needs fixing.
+ MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
+ // All generated opcodes have scaled offsets.
+ assert(LocalStackSize % 8 == 0);
+ OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
}
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
@@ -316,40 +433,59 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
-
+ if (!NumBytes)
+ return;
// REDZONE: If the stack size is less than 128 bytes, we don't need
// to actually allocate.
- if (NumBytes && !canUseRedZone(MF)) {
+ if (canUseRedZone(MF))
+ ++NumRedZoneFunctions;
+ else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
// Encode the stack size of the leaf function.
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
- } else if (NumBytes) {
- ++NumRedZoneFunctions;
}
-
return;
}
- // Only set up FP if we actually need to.
- int FPOffset = 0;
- if (HasFP)
- FPOffset = getFPOffsetInPrologue(MBBI);
+ auto CSStackSize = AFI->getCalleeSavedStackSize();
+ // All of the remaining stack allocations are for locals.
+ AFI->setLocalStackSize(NumBytes - CSStackSize);
- // Move past the saves of the callee-saved registers.
- while (isCSSave(MBBI)) {
- ++MBBI;
- NumBytes -= 16;
+ bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
+ if (CombineSPBump) {
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
+ MachineInstr::FrameSetup);
+ NumBytes = 0;
+ } else if (CSStackSize != 0) {
+ MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
+ -CSStackSize);
+ NumBytes -= CSStackSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
+
+ // Move past the saves of the callee-saved registers, fixing up the offsets
+ // and pre-inc if we decided to combine the callee-save and local stack
+ // pointer bump above.
+ MachineBasicBlock::iterator End = MBB.end();
+ while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
+ if (CombineSPBump)
+ fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
+ ++MBBI;
+ }
if (HasFP) {
+ // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
+ int FPOffset = CSStackSize - 16;
+ if (CombineSPBump)
+ FPOffset += AFI->getLocalStackSize();
+
// Issue sub fp, sp, FPOffset or
// mov fp,sp when FPOffset is zero.
// Note: All stores of callee-saved registers are marked as "FrameSetup".
@@ -358,47 +494,46 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup);
}
- // All of the remaining stack allocations are for locals.
- AFI->setLocalStackSize(NumBytes);
-
// Allocate space for the rest of the frame.
+ if (NumBytes) {
+ const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
+ unsigned scratchSPReg = AArch64::SP;
- const unsigned Alignment = MFI->getMaxAlignment();
- const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
- unsigned scratchSPReg = AArch64::SP;
- if (NumBytes && NeedsRealignment) {
- // Use the first callee-saved register as a scratch register.
- scratchSPReg = AArch64::X9;
- }
+ if (NeedsRealignment) {
+ scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
+ assert(scratchSPReg != AArch64::NoRegister);
+ }
- // If we're a leaf function, try using the red zone.
- if (NumBytes && !canUseRedZone(MF))
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+ // If we're a leaf function, try using the red zone.
+ if (!canUseRedZone(MF))
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+ MachineInstr::FrameSetup);
- if (NumBytes && NeedsRealignment) {
- const unsigned NrBitsToZero = countTrailingZeros(Alignment);
- assert(NrBitsToZero > 1);
- assert(scratchSPReg != AArch64::SP);
-
- // SUB X9, SP, NumBytes
- // -- X9 is temporary register, so shouldn't contain any live data here,
- // -- free to use. This is already produced by emitFrameOffset above.
- // AND SP, X9, 0b11111...0000
- // The logical immediates have a non-trivial encoding. The following
- // formula computes the encoded immediate with all ones but
- // NrBitsToZero zero bits as least significant bits.
- uint32_t andMaskEncoded =
- (1 <<12) // = N
- | ((64-NrBitsToZero) << 6) // immr
- | ((64-NrBitsToZero-1) << 0) // imms
- ;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(scratchSPReg, RegState::Kill)
- .addImm(andMaskEncoded);
+ if (NeedsRealignment) {
+ const unsigned Alignment = MFI->getMaxAlignment();
+ const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ assert(NrBitsToZero > 1);
+ assert(scratchSPReg != AArch64::SP);
+
+ // SUB X9, SP, NumBytes
+ // -- X9 is temporary register, so shouldn't contain any live data here,
+ // -- free to use. This is already produced by emitFrameOffset above.
+ // AND SP, X9, 0b11111...0000
+ // The logical immediates have a non-trivial encoding. The following
+ // formula computes the encoded immediate with all ones but
+ // NrBitsToZero zero bits as least significant bits.
+ uint32_t andMaskEncoded = (1 << 12) // = N
+ | ((64 - NrBitsToZero) << 6) // immr
+ | ((64 - NrBitsToZero - 1) << 0); // imms
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(scratchSPReg, RegState::Kill)
+ .addImm(andMaskEncoded);
+ AFI->setStackRealigned(true);
+ }
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -491,21 +626,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
-
- // Record the location of the stored LR
- unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
-
- // Record the location of the stored FP
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
} else {
// Encode the stack size of the leaf function.
unsigned CFIIndex = MMI.addFrameInst(
@@ -515,36 +635,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
}
- // Now emit the moves for whatever callee saved regs we have.
- emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
- }
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
-/// Checks whether the given instruction restores callee save registers
-/// and if so returns how many.
-static unsigned getNumCSRestores(MachineInstr &MI, const MCPhysReg *CSRegs) {
- unsigned RtIdx = 0;
- switch (MI.getOpcode()) {
- case AArch64::LDPXpost:
- case AArch64::LDPDpost:
- RtIdx = 1;
- // FALLTHROUGH
- case AArch64::LDPXi:
- case AArch64::LDPDi:
- if (!isCalleeSavedRegister(MI.getOperand(RtIdx).getReg(), CSRegs) ||
- !isCalleeSavedRegister(MI.getOperand(RtIdx + 1).getReg(), CSRegs) ||
- MI.getOperand(RtIdx + 2).getReg() != AArch64::SP)
- return 0;
- return 2;
+ // Now emit the moves for whatever callee saved regs we have (including FP,
+ // LR if those are saved).
+ emitCalleeSavedFrameMoves(MBB, MBBI);
}
- return 0;
}
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
@@ -552,7 +646,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool IsTailCallReturn = false;
@@ -599,7 +692,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// ---------------------| --- |
// | | | |
// | CalleeSavedReg | | |
- // | (NumRestores * 8) | | |
+ // | (CalleeSavedStackSize)| | |
// | | | |
// ---------------------| | NumBytes
// | | StackSize (StackAdjustUp)
@@ -614,41 +707,74 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
//
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
// it as the 2nd argument of AArch64ISD::TC_RETURN.
- NumBytes += ArgumentPopSize;
- unsigned NumRestores = 0;
+ auto CSStackSize = AFI->getCalleeSavedStackSize();
+ bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
+
+ if (!CombineSPBump && CSStackSize != 0)
+ convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
+
// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastPopI != Begin) {
--LastPopI;
- unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
- NumRestores += Restores;
- if (Restores == 0) {
+ if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
++LastPopI;
break;
- }
+ } else if (CombineSPBump)
+ fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
+ }
+
+ // If there is a single SP update, insert it before the ret and we're done.
+ if (CombineSPBump) {
+ emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
+ NumBytes + ArgumentPopSize, TII,
+ MachineInstr::FrameDestroy);
+ return;
}
- NumBytes -= NumRestores * 8;
+
+ NumBytes -= CSStackSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (!hasFP(MF)) {
+ bool RedZone = canUseRedZone(MF);
// If this was a redzone leaf function, we don't need to restore the
- // stack pointer.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
- TII);
- return;
+ // stack pointer (but we may need to pop stack args for fastcc).
+ if (RedZone && ArgumentPopSize == 0)
+ return;
+
+ bool NoCalleeSaveRestore = CSStackSize == 0;
+ int StackRestoreBytes = RedZone ? 0 : NumBytes;
+ if (NoCalleeSaveRestore)
+ StackRestoreBytes += ArgumentPopSize;
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+ // If we were able to combine the local stack pop with the argument pop,
+ // then we're done.
+ if (NoCalleeSaveRestore || ArgumentPopSize == 0)
+ return;
+ NumBytes = 0;
}
// Restore the original stack pointer.
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
- if (NumBytes || MFI->hasVarSizedObjects())
+ if (MFI->hasVarSizedObjects() || AFI->isStackRealigned())
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
+ -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
+ else if (NumBytes)
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
+ MachineInstr::FrameDestroy);
+
+ // This must be placed after the callee-save restore code because that code
+ // assumes the SP is at the same location as it was after the callee-save save
+ // code in the prologue.
+ if (ArgumentPopSize)
+ emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
+ ArgumentPopSize, TII, MachineInstr::FrameDestroy);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -726,86 +852,167 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
- if (Reg != AArch64::LR)
- return getKillRegState(true);
+ // Do not set a kill flag on values that are also marked as live-in. This
+ // happens with the @llvm-returnaddress intrinsic and with arguments passed in
+ // callee saved registers.
+ // Omitting the kill flags is conservatively correct even if the live-in
+ // is not used after all.
+ bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
+ return getKillRegState(!IsLiveIn);
+}
- // LR maybe referred to later by an @llvm.returnaddress intrinsic.
- bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
- bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
- return getKillRegState(LRKill);
+static bool produceCompactUnwindFrame(MachineFunction &MF) {
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ AttributeSet Attrs = MF.getFunction()->getAttributes();
+ return Subtarget.isTargetMachO() &&
+ !(Subtarget.getTargetLowering()->supportSwiftError() &&
+ Attrs.hasAttrSomewhere(Attribute::SwiftError));
}
-bool AArch64FrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+struct RegPairInfo {
+ RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
+ unsigned Reg1;
+ unsigned Reg2;
+ int FrameIdx;
+ int Offset;
+ bool IsGPR;
+ bool isPaired() const { return Reg2 != AArch64::NoRegister; }
+};
+
+static void computeCalleeSaveRegisterPairs(
+ MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
+
+ if (CSI.empty())
+ return;
+
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
unsigned Count = CSI.size();
- DebugLoc DL;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+ (void)CC;
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs.
+ assert((!produceCompactUnwindFrame(MF) ||
+ CC == CallingConv::PreserveMost ||
+ (Count & 1) == 0) &&
+ "Odd number of callee-saved regs to spill!");
+ unsigned Offset = AFI->getCalleeSavedStackSize();
+
+ for (unsigned i = 0; i < Count; ++i) {
+ RegPairInfo RPI;
+ RPI.Reg1 = CSI[i].getReg();
+
+ assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
+ AArch64::FPR64RegClass.contains(RPI.Reg1));
+ RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
+
+ // Add the next reg to the pair if it is in the same register class.
+ if (i + 1 < Count) {
+ unsigned NextReg = CSI[i + 1].getReg();
+ if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
+ (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
+ RPI.Reg2 = NextReg;
+ }
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned idx = Count - i - 2;
- unsigned Reg1 = CSI[idx].getReg();
- unsigned Reg2 = CSI[idx + 1].getReg();
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
// pair instructions directly. Assert if we see anything otherwise.
//
// The order of the registers in the list is controlled by
// getCalleeSavedRegs(), so they will always be in-order, as well.
- assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
+ assert((!RPI.isPaired() ||
+ (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
"Out of order callee saved regs!");
+
+ // MachO's compact unwind format relies on all registers being stored in
+ // adjacent register pairs.
+ assert((!produceCompactUnwindFrame(MF) ||
+ CC == CallingConv::PreserveMost ||
+ (RPI.isPaired() &&
+ ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
+ RPI.Reg1 + 1 == RPI.Reg2))) &&
+ "Callee-save registers not saved as adjacent register pair!");
+
+ RPI.FrameIdx = CSI[i].getFrameIdx();
+
+ if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
+ // Round up size of non-pair to pair size if we need to pad the
+ // callee-save area to ensure 16-byte alignment.
+ Offset -= 16;
+ assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
+ MFI->setObjectAlignment(RPI.FrameIdx, 16);
+ AFI->setCalleeSaveStackHasFreeSpace(true);
+ } else
+ Offset -= RPI.isPaired() ? 16 : 8;
+ assert(Offset % 8 == 0);
+ RPI.Offset = Offset / 8;
+ assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
+ "Offset out of bounds for LDP/STP immediate");
+
+ RegPairs.push_back(RPI);
+ if (RPI.isPaired())
+ ++i;
+ }
+}
+
+bool AArch64FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ DebugLoc DL;
+ SmallVector<RegPairInfo, 8> RegPairs;
+
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+
+ for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
+ ++RPII) {
+ RegPairInfo RPI = *RPII;
+ unsigned Reg1 = RPI.Reg1;
+ unsigned Reg2 = RPI.Reg2;
unsigned StrOpc;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- // Issue sequence of non-sp increment and pi sp spills for cs regs. The
- // first spill is a pre-increment that allocates the stack.
+
+ // Issue sequence of spills for cs regs. The first spill may be converted
+ // to a pre-decrement store later by emitPrologue if the callee-save stack
+ // area allocation can't be combined with the local stack area allocation.
// For example:
- // stp x22, x21, [sp, #-48]! // addImm(-6)
+ // stp x22, x21, [sp, #0] // addImm(+0)
// stp x20, x19, [sp, #16] // addImm(+2)
// stp fp, lr, [sp, #32] // addImm(+4)
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
- // Note: Similar rational and sequence for restores in epilog.
- if (AArch64::GPR64RegClass.contains(Reg1)) {
- assert(AArch64::GPR64RegClass.contains(Reg2) &&
- "Expected GPR64 callee-saved register pair!");
- // For first spill use pre-increment store.
- if (i == 0)
- StrOpc = AArch64::STPXpre;
- else
- StrOpc = AArch64::STPXi;
- } else if (AArch64::FPR64RegClass.contains(Reg1)) {
- assert(AArch64::FPR64RegClass.contains(Reg2) &&
- "Expected FPR64 callee-saved register pair!");
- // For first spill use pre-increment store.
- if (i == 0)
- StrOpc = AArch64::STPDpre;
- else
- StrOpc = AArch64::STPDi;
- } else
- llvm_unreachable("Unexpected callee saved register!");
- DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
- << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
- // Compute offset: i = 0 => offset = -Count;
- // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
- const int Offset = (i == 0) ? -Count : i;
- assert((Offset >= -64 && Offset <= 63) &&
- "Offset out of bounds for STP immediate");
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
- if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
- MIB.addReg(AArch64::SP, RegState::Define);
+ // Note: Similar rationale and sequence for restores in epilog.
+ if (RPI.IsGPR)
+ StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
+ else
+ StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+ DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
+ if (RPI.isPaired())
+ dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx+1;
+ dbgs() << ")\n");
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
MBB.addLiveIn(Reg1);
- MBB.addLiveIn(Reg2);
- MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
- .addReg(Reg1, getPrologueDeath(MF, Reg1))
+ if (RPI.isPaired()) {
+ MBB.addLiveIn(Reg2);
+ MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
+ MachineMemOperand::MOStore, 8, 8));
+ }
+ MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
- .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
.setMIFlag(MachineInstr::FrameSetup);
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
+ MachineMemOperand::MOStore, 8, 8));
}
return true;
}
@@ -816,66 +1023,55 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- unsigned Count = CSI.size();
DebugLoc DL;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+ SmallVector<RegPairInfo, 8> RegPairs;
if (MI != MBB.end())
DL = MI->getDebugLoc();
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned Reg1 = CSI[i].getReg();
- unsigned Reg2 = CSI[i + 1].getReg();
- // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
- // list to come in sorted by frame index so that we can issue the store
- // pair instructions directly. Assert if we see anything otherwise.
- assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
- "Out of order callee saved regs!");
- // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
- // the last load is sp-pi post-increment and de-allocates the stack:
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+
+ for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
+ ++RPII) {
+ RegPairInfo RPI = *RPII;
+ unsigned Reg1 = RPI.Reg1;
+ unsigned Reg2 = RPI.Reg2;
+
+ // Issue sequence of restores for cs regs. The last restore may be converted
+ // to a post-increment load later by emitEpilogue if the callee-save stack
+ // area allocation can't be combined with the local stack area allocation.
// For example:
// ldp fp, lr, [sp, #32] // addImm(+4)
// ldp x20, x19, [sp, #16] // addImm(+2)
- // ldp x22, x21, [sp], #48 // addImm(+6)
+ // ldp x22, x21, [sp, #0] // addImm(+0)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
+ if (RPI.IsGPR)
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
+ else
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+ DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
+ if (RPI.isPaired())
+ dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx+1;
+ dbgs() << ")\n");
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- if (AArch64::GPR64RegClass.contains(Reg1)) {
- assert(AArch64::GPR64RegClass.contains(Reg2) &&
- "Expected GPR64 callee-saved register pair!");
- if (i == Count - 2)
- LdrOpc = AArch64::LDPXpost;
- else
- LdrOpc = AArch64::LDPXi;
- } else if (AArch64::FPR64RegClass.contains(Reg1)) {
- assert(AArch64::FPR64RegClass.contains(Reg2) &&
- "Expected FPR64 callee-saved register pair!");
- if (i == Count - 2)
- LdrOpc = AArch64::LDPDpost;
- else
- LdrOpc = AArch64::LDPDi;
- } else
- llvm_unreachable("Unexpected callee saved register!");
- DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
- << ", " << CSI[i + 1].getFrameIdx() << ")\n");
-
- // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
- // etc.
- const int Offset = (i == Count - 2) ? Count : Count - i - 2;
- assert((Offset >= -64 && Offset <= 63) &&
- "Offset out of bounds for LDP immediate");
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
- if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost)
- MIB.addReg(AArch64::SP, RegState::Define);
-
- MIB.addReg(Reg2, getDefRegState(true))
- .addReg(Reg1, getDefRegState(true))
+ if (RPI.isPaired()) {
+ MIB.addReg(Reg2, getDefRegState(true));
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
+ MachineMemOperand::MOLoad, 8, 8));
+ }
+ MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
- .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
- // where the factor * 8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+ .setMIFlag(MachineInstr::FrameDestroy);
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
+ MachineMemOperand::MOLoad, 8, 8));
}
return true;
}
@@ -892,8 +1088,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- SmallVector<unsigned, 4> UnspilledCSGPRs;
- SmallVector<unsigned, 4> UnspilledCSFPRs;
+ unsigned UnspilledCSGPR = AArch64::NoRegister;
+ unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
// The frame record needs to be created by saving the appropriate registers
if (hasFP(MF)) {
@@ -901,79 +1097,51 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::LR);
}
- // Spill the BasePtr if it's used. Do this first thing so that the
- // getCalleeSavedRegs() below will get the right answer.
+ unsigned BasePointerReg = AArch64::NoRegister;
if (RegInfo->hasBasePointer(MF))
- SavedRegs.set(RegInfo->getBaseRegister());
-
- if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
- SavedRegs.set(AArch64::X9);
+ BasePointerReg = RegInfo->getBaseRegister();
- // If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumGPRSpilled = 0;
- unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
- bool CanEliminateFrame = true;
- DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ // Figure out which callee-saved registers to save/restore.
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ const unsigned Reg = CSRegs[i];
+
+ // Add the base pointer register to SavedRegs if it is callee-save.
+ if (Reg == BasePointerReg)
+ SavedRegs.set(Reg);
- // Check pairs of consecutive callee-saved registers.
- for (unsigned i = 0; CSRegs[i]; i += 2) {
- assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
-
- const unsigned OddReg = CSRegs[i];
- const unsigned EvenReg = CSRegs[i + 1];
- assert((AArch64::GPR64RegClass.contains(OddReg) &&
- AArch64::GPR64RegClass.contains(EvenReg)) ^
- (AArch64::FPR64RegClass.contains(OddReg) &&
- AArch64::FPR64RegClass.contains(EvenReg)) &&
- "Register class mismatch!");
-
- const bool OddRegUsed = SavedRegs.test(OddReg);
- const bool EvenRegUsed = SavedRegs.test(EvenReg);
-
- // Early exit if none of the registers in the register pair is actually
- // used.
- if (!OddRegUsed && !EvenRegUsed) {
- if (AArch64::GPR64RegClass.contains(OddReg)) {
- UnspilledCSGPRs.push_back(OddReg);
- UnspilledCSGPRs.push_back(EvenReg);
- } else {
- UnspilledCSFPRs.push_back(OddReg);
- UnspilledCSFPRs.push_back(EvenReg);
+ bool RegUsed = SavedRegs.test(Reg);
+ unsigned PairedReg = CSRegs[i ^ 1];
+ if (!RegUsed) {
+ if (AArch64::GPR64RegClass.contains(Reg) &&
+ !RegInfo->isReservedReg(MF, Reg)) {
+ UnspilledCSGPR = Reg;
+ UnspilledCSGPRPaired = PairedReg;
}
continue;
}
- unsigned Reg = AArch64::NoRegister;
- // If only one of the registers of the register pair is used, make sure to
- // mark the other one as used as well.
- if (OddRegUsed ^ EvenRegUsed) {
- // Find out which register is the additional spill.
- Reg = OddRegUsed ? EvenReg : OddReg;
- SavedRegs.set(Reg);
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs.
+ // FIXME: the usual format is actually better if unwinding isn't needed.
+ if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
+ SavedRegs.set(PairedReg);
+ if (AArch64::GPR64RegClass.contains(PairedReg) &&
+ !RegInfo->isReservedReg(MF, PairedReg))
+ ExtraCSSpill = true;
}
+ }
- DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
- DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
-
- assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
- (RegInfo->getEncodingValue(OddReg) + 1 ==
- RegInfo->getEncodingValue(EvenReg))) &&
- "Register pair of non-adjacent registers!");
- if (AArch64::GPR64RegClass.contains(OddReg)) {
- NumGPRSpilled += 2;
- // If it's not a reserved register, we can use it in lieu of an
- // emergency spill slot for the register scavenger.
- // FIXME: It would be better to instead keep looking and choose another
- // unspilled register that isn't reserved, if there is one.
- if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
- ExtraCSSpill = true;
- } else
- NumFPRSpilled += 2;
+ DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
+ for (int Reg = SavedRegs.find_first(); Reg != -1;
+ Reg = SavedRegs.find_next(Reg))
+ dbgs() << ' ' << PrintReg(Reg, RegInfo);
+ dbgs() << "\n";);
- CanEliminateFrame = false;
- }
+ // If any callee-saved registers are used, the frame cannot be eliminated.
+ unsigned NumRegsSpilled = SavedRegs.count();
+ bool CanEliminateFrame = NumRegsSpilled == 0;
// FIXME: Set BigStack if any stack slot references may be out of range.
// For now, just conservatively guestimate based on unscaled indexing
@@ -982,8 +1150,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize =
- MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+ unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
@@ -996,19 +1163,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// above to keep the number of spills even, we don't need to do anything else
// here.
if (BigStack && !ExtraCSSpill) {
-
- // If we're adding a register to spill here, we have to add two of them
- // to keep the number of regs to spill even.
- assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
- unsigned Count = 0;
- while (!UnspilledCSGPRs.empty() && Count < 2) {
- unsigned Reg = UnspilledCSGPRs.back();
- UnspilledCSGPRs.pop_back();
- DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
- << " to get a scratch register.\n");
- SavedRegs.set(Reg);
+ if (UnspilledCSGPR != AArch64::NoRegister) {
+ DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
+ << " to get a scratch register.\n");
+ SavedRegs.set(UnspilledCSGPR);
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs, so if we need to spill one extra for BigStack, then we need to
+ // store the pair.
+ if (produceCompactUnwindFrame(MF))
+ SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = true;
- ++Count;
+ NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
@@ -1021,4 +1186,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
<< " as the emergency spill slot.\n");
}
}
+
+ // Round up to register pair alignment to avoid additional SP adjustment
+ // instructions.
+ AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+}
+
+bool AArch64FrameLowering::enableStackSlotScavenging(
+ const MachineFunction &MF) const {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return AFI->hasCalleeSaveStackFreeSpace();
}