summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64FrameLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
commiteb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Target/AArch64/AArch64FrameLowering.cpp
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
Notes
Diffstat (limited to 'lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp313
1 files changed, 250 insertions, 63 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index d66f7b59a4b5..6dc5d19862a9 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -140,8 +140,19 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
cl::desc("enable use of redzone on AArch64"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ ReverseCSRRestoreSeq("reverse-csr-restore-seq",
+ cl::desc("reverse the CSR restore sequence"),
+ cl::init(false), cl::Hidden);
+
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+/// This is the biggest offset to the stack pointer we can encode in aarch64
+/// instructions (without using a separate calculation and a temp register).
+/// Note that the exception here are vector stores/loads which cannot encode any
+/// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
+static const unsigned DefaultSafeSPDisplacement = 255;
+
/// Look at each instruction that references stack frames and return the stack
/// size limit beyond which some of these instructions will require a scratch
/// register during their expansion later.
@@ -151,7 +162,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
// realistically that's not a big deal at this stage of the game.
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- if (MI.isDebugValue() || MI.isPseudo() ||
+ if (MI.isDebugInstr() || MI.isPseudo() ||
MI.getOpcode() == AArch64::ADDXri ||
MI.getOpcode() == AArch64::ADDSXri)
continue;
@@ -167,7 +178,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
}
}
}
- return 255;
+ return DefaultSafeSPDisplacement;
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -191,11 +202,25 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// Retain behavior of always omitting the FP for leaf functions when possible.
- return (MFI.hasCalls() &&
- MF.getTarget().Options.DisableFramePointerElim(MF)) ||
- MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
- MFI.hasStackMap() || MFI.hasPatchPoint() ||
- RegInfo->needsStackRealignment(MF);
+ if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
+ return true;
+ if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+ MFI.hasStackMap() || MFI.hasPatchPoint() ||
+ RegInfo->needsStackRealignment(MF))
+ return true;
+ // With large callframes around we may need to use FP to access the scavenging
+ // emergency spillslot.
+ //
+ // Unfortunately some calls to hasFP() like machine verifier ->
+ // getReservedReg() -> hasFP in the middle of global isel are too early
+ // to know the max call frame size. Hopefully conservatively returning "true"
+ // in those cases is fine.
+ // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
+ if (!MFI.isMaxCallFrameSizeComputed() ||
+ MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
+ return true;
+
+ return false;
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -349,7 +374,8 @@ static bool windowsRequiresStackProbe(MachineFunction &MF,
F.getFnAttribute("stack-probe-size")
.getValueAsString()
.getAsInteger(0, StackProbeSize);
- return StackSizeInBytes >= StackProbeSize;
+ return (StackSizeInBytes >= StackProbeSize) &&
+ !F.hasFnAttribute("no-stack-arg-probe");
}
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
@@ -388,6 +414,14 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
+ // Ignore instructions that do not operate on SP, i.e. shadow call stack
+ // instructions.
+ while (MBBI->getOpcode() == AArch64::STRXpost ||
+ MBBI->getOpcode() == AArch64::LDRXpre) {
+ assert(MBBI->getOperand(0).getReg() != AArch64::SP);
+ ++MBBI;
+ }
+
unsigned NewOpc;
bool NewIsUnscaled = false;
switch (MBBI->getOpcode()) {
@@ -455,6 +489,14 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
unsigned LocalStackSize) {
unsigned Opc = MI.getOpcode();
+
+ // Ignore instructions that do not operate on SP, i.e. shadow call stack
+ // instructions.
+ if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) {
+ assert(MI.getOperand(0).getReg() != AArch64::SP);
+ return;
+ }
+
(void)Opc;
assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
@@ -472,6 +514,38 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
}
+static void adaptForLdStOpt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator FirstSPPopI,
+ MachineBasicBlock::iterator LastPopI) {
+ // Sometimes (when we restore in the same order as we save), we can end up
+ // with code like this:
+ //
+ // ldp x26, x25, [sp]
+ // ldp x24, x23, [sp, #16]
+ // ldp x22, x21, [sp, #32]
+ // ldp x20, x19, [sp, #48]
+ // add sp, sp, #64
+ //
+ // In this case, it is always better to put the first ldp at the end, so
+ // that the load-store optimizer can run and merge the ldp and the add into
+ // a post-index ldp.
+ // If we managed to grab the first pop instruction, move it to the end.
+ if (ReverseCSRRestoreSeq)
+ MBB.splice(FirstSPPopI, &MBB, LastPopI);
+ // We should end up with something like this now:
+ //
+ // ldp x24, x23, [sp, #16]
+ // ldp x22, x21, [sp, #32]
+ // ldp x20, x19, [sp, #48]
+ // ldp x26, x25, [sp]
+ // add sp, sp, #64
+ //
+ // and the load-store optimizer can merge the last two instructions into:
+ //
+ // ldp x26, x25, [sp], #64
+ //
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -485,6 +559,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
bool HasFP = hasFP(MF);
+ // At this point, we're going to decide whether or not the function uses a
+ // redzone. In most cases, the function doesn't have a redzone so let's
+ // assume that's false and set it to true in the case that there's a redzone.
+ AFI->setHasRedZone(false);
+
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc DL;
@@ -505,9 +584,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
return;
// REDZONE: If the stack size is less than 128 bytes, we don't need
// to actually allocate.
- if (canUseRedZone(MF))
+ if (canUseRedZone(MF)) {
+ AFI->setHasRedZone(true);
++NumRedZoneFunctions;
- else {
+ } else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
@@ -823,14 +903,32 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ uint64_t AfterCSRPopSize = ArgumentPopSize;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
+ // Assume we can't combine the last pop with the sp restore.
- if (!CombineSPBump && PrologueSaveSize != 0)
- convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
+ if (!CombineSPBump && PrologueSaveSize != 0) {
+ MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
+ // Converting the last ldp to a post-index ldp is valid only if the last
+ // ldp's offset is 0.
+ const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
+ // If the offset is 0, convert it to a post-index ldp.
+ if (OffsetOp.getImm() == 0) {
+ convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII,
+ PrologueSaveSize);
+ } else {
+ // If not, make sure to emit an add after the last ldp.
+ // We're doing this by transfering the size to be restored from the
+ // adjustment *before* the CSR pops to the adjustment *after* the CSR
+ // pops.
+ AfterCSRPopSize += PrologueSaveSize;
+ }
+ }
// Move past the restores of the callee-saved registers.
+ // If we plan on combining the sp bump of the local stack size and the callee
+ // save stack size, we might need to adjust the CSR save and restore offsets.
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastPopI != Begin) {
@@ -845,7 +943,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- NumBytes + ArgumentPopSize, TII,
+ NumBytes + AfterCSRPopSize, TII,
MachineInstr::FrameDestroy);
return;
}
@@ -857,19 +955,27 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
bool RedZone = canUseRedZone(MF);
// If this was a redzone leaf function, we don't need to restore the
// stack pointer (but we may need to pop stack args for fastcc).
- if (RedZone && ArgumentPopSize == 0)
+ if (RedZone && AfterCSRPopSize == 0)
return;
bool NoCalleeSaveRestore = PrologueSaveSize == 0;
int StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
- StackRestoreBytes += ArgumentPopSize;
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+ StackRestoreBytes += AfterCSRPopSize;
+
// If we were able to combine the local stack pop with the argument pop,
// then we're done.
- if (NoCalleeSaveRestore || ArgumentPopSize == 0)
+ bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
+
+ // If we're done after this, make sure to help the load store optimizer.
+ if (Done)
+ adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
+
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+ if (Done)
return;
+
NumBytes = 0;
}
@@ -888,9 +994,24 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
// code in the prologue.
- if (ArgumentPopSize)
- emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- ArgumentPopSize, TII, MachineInstr::FrameDestroy);
+ if (AfterCSRPopSize) {
+ // Find an insertion point for the first ldp so that it goes before the
+ // shadow call stack epilog instruction. This ensures that the restore of
+ // lr from x18 is placed after the restore from sp.
+ auto FirstSPPopI = MBB.getFirstTerminator();
+ while (FirstSPPopI != Begin) {
+ auto Prev = std::prev(FirstSPPopI);
+ if (Prev->getOpcode() != AArch64::LDRXpre ||
+ Prev->getOperand(0).getReg() == AArch64::SP)
+ break;
+ FirstSPPopI = Prev;
+ }
+
+ adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
+
+ emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
+ AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
+ }
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -917,6 +1038,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
bool isFixed = MFI.isFixedObjectIndex(FI);
+ bool isCSR = !isFixed && MFI.getObjectOffset(FI) >=
+ -((int)AFI->getCalleeSavedStackSize());
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -930,26 +1053,48 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
- } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
- !RegInfo->needsStackRealignment(MF)) {
- // Use SP or FP, whichever gives us the best chance of the offset
- // being in range for direct access. If the FPOffset is positive,
- // that'll always be best, as the SP will be even further away.
+ } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
+ // References to the CSR area must use FP if we're re-aligning the stack
+ // since the dynamically-sized alignment padding is between the SP/BP and
+ // the CSR area.
+ assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
+ UseFP = true;
+ } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
// If the FPOffset is negative, we have to keep in mind that the
// available offset range for negative offsets is smaller than for
- // positive ones. If we have variable sized objects, we're stuck with
- // using the FP regardless, though, as the SP offset is unknown
- // and we don't have a base pointer available. If an offset is
+ // positive ones. If an offset is
// available via the FP and the SP, use whichever is closest.
- if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
- (FPOffset >= -256 && Offset > -FPOffset))
+ bool FPOffsetFits = FPOffset >= -256;
+ PreferFP |= Offset > -FPOffset;
+
+ if (MFI.hasVarSizedObjects()) {
+ // If we have variable sized objects, we can use either FP or BP, as the
+ // SP offset is unknown. We can use the base pointer if we have one and
+ // FP is not preferred. If not, we're stuck with using FP.
+ bool CanUseBP = RegInfo->hasBasePointer(MF);
+ if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
+ UseFP = PreferFP;
+ else if (!CanUseBP) // Can't use BP. Forced to use FP.
+ UseFP = true;
+ // else we can use BP and FP, but the offset from FP won't fit.
+ // That will make us scavenge registers which we can probably avoid by
+ // using BP. If it won't fit for BP either, we'll scavenge anyway.
+ } else if (FPOffset >= 0) {
+ // Use SP or FP, whichever gives us the best chance of the offset
+ // being in range for direct access. If the FPOffset is positive,
+ // that'll always be best, as the SP will be even further away.
UseFP = true;
+ } else {
+ // We have the choice between FP and (SP or BP).
+ if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
+ UseFP = true;
+ }
}
}
- assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+ assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
"In the presence of dynamic stack pointer realignment, "
- "non-argument objects cannot be accessed through the frame pointer");
+ "non-argument/CSR objects cannot be accessed through the frame pointer");
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
@@ -960,6 +1105,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
if (RegInfo->hasBasePointer(MF))
FrameReg = RegInfo->getBaseRegister();
else {
+ assert(!MFI.hasVarSizedObjects() &&
+ "Can't use SP when we have var sized objects.");
FrameReg = AArch64::SP;
// If we're using the red zone for this function, the SP won't actually
// be adjusted, so the offsets will be negative. They're also all
@@ -1007,7 +1154,8 @@ struct RegPairInfo {
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
+ const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
+ bool &NeedShadowCallStackProlog) {
if (CSI.empty())
return;
@@ -1041,6 +1189,15 @@ static void computeCalleeSaveRegisterPairs(
RPI.Reg2 = NextReg;
}
+ // If either of the registers to be saved is the lr register, it means that
+ // we also need to save lr in the shadow call stack.
+ if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
+ MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
+ if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+ report_fatal_error("Must reserve x18 to use shadow call stack");
+ NeedShadowCallStackProlog = true;
+ }
+
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
// pair instructions directly. Assert if we see anything otherwise.
@@ -1091,9 +1248,24 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+ bool NeedShadowCallStackProlog = false;
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
+ NeedShadowCallStackProlog);
const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (NeedShadowCallStackProlog) {
+ // Shadow call stack prolog: str x30, [x18], #8
+ BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
+ .addReg(AArch64::X18, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::X18)
+ .addImm(8)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // This instruction also makes x18 live-in to the entry block.
+ MBB.addLiveIn(AArch64::X18);
+ }
+
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
@@ -1115,13 +1287,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
else
StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
- DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
- if (RPI.isPaired())
- dbgs() << ", " << printReg(Reg2, TRI);
- dbgs() << ") -> fi#(" << RPI.FrameIdx;
- if (RPI.isPaired())
- dbgs() << ", " << RPI.FrameIdx+1;
- dbgs() << ")\n");
+ LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
+ if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
+ dbgs() << ")\n");
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (!MRI.isReserved(Reg1))
@@ -1157,11 +1327,11 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (MI != MBB.end())
DL = MI->getDebugLoc();
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+ bool NeedShadowCallStackProlog = false;
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
+ NeedShadowCallStackProlog);
- for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
- ++RPII) {
- RegPairInfo RPI = *RPII;
+ auto EmitMI = [&](const RegPairInfo &RPI) {
unsigned Reg1 = RPI.Reg1;
unsigned Reg2 = RPI.Reg2;
@@ -1178,13 +1348,11 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
else
LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
- DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
- if (RPI.isPaired())
- dbgs() << ", " << printReg(Reg2, TRI);
- dbgs() << ") -> fi#(" << RPI.FrameIdx;
- if (RPI.isPaired())
- dbgs() << ", " << RPI.FrameIdx+1;
- dbgs() << ")\n");
+ LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
+ if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
+ dbgs() << ")\n");
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
if (RPI.isPaired()) {
@@ -1200,7 +1368,25 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
MachineMemOperand::MOLoad, 8, 8));
+ };
+
+ if (ReverseCSRRestoreSeq)
+ for (const RegPairInfo &RPI : reverse(RegPairs))
+ EmitMI(RPI);
+ else
+ for (const RegPairInfo &RPI : RegPairs)
+ EmitMI(RPI);
+
+ if (NeedShadowCallStackProlog) {
+ // Shadow call stack epilog: ldr x30, [x18, #-8]!
+ BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
+ .addReg(AArch64::X18, RegState::Define)
+ .addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::X18)
+ .addImm(-8)
+ .setMIFlag(MachineInstr::FrameDestroy);
}
+
return true;
}
@@ -1275,10 +1461,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
- DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
- for (unsigned Reg : SavedRegs.set_bits())
- dbgs() << ' ' << printReg(Reg, RegInfo);
- dbgs() << "\n";);
+ LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
+ for (unsigned Reg
+ : SavedRegs.set_bits()) dbgs()
+ << ' ' << printReg(Reg, RegInfo);
+ dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumRegsSpilled = SavedRegs.count();
@@ -1287,7 +1474,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
- DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
+ LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
bool BigStack = (CFSize > EstimatedStackSizeLimit);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
@@ -1301,8 +1488,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// here.
if (BigStack) {
if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
- DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
- << " to get a scratch register.\n");
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
+ << " to get a scratch register.\n");
SavedRegs.set(UnspilledCSGPR);
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
@@ -1322,8 +1509,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned Align = TRI->getSpillAlignment(RC);
int FI = MFI.CreateStackObject(Size, Align, false);
RS->addScavengingFrameIndex(FI);
- DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
- << " as the emergency spill slot.\n");
+ LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
+ << " as the emergency spill slot.\n");
}
}