summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp353
1 files changed, 290 insertions, 63 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 65ee5016042c..f6a528c0e6fd 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -47,8 +47,9 @@
// | callee-saved gpr registers | <--.
// | | | On Darwin platforms these
// |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
-// | | | (frame record first)
-// | prev_fp, prev_lr | <--'
+// | prev_lr | | (frame record first)
+// | prev_fp | <--'
+// | async context if needed |
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
// | |
@@ -107,8 +108,14 @@
// so large that the offset can't be encoded in the immediate fields of loads
// or stores.
//
+// Outgoing function arguments must be at the bottom of the stack frame when
+// calling another function. If we do not have variable-sized stack objects, we
+// can allocate a "reserved call frame" area at the bottom of the local
+// variable area, large enough for all outgoing calls. If we do have VLAs, then
+// the stack pointer must be decremented and incremented around each call to
+// make space for the arguments below the VLAs.
+//
// FIXME: also explain the redzone concept.
-// FIXME: also explain the concept of reserved call frames.
//
//===----------------------------------------------------------------------===//
@@ -179,11 +186,21 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
+cl::opt<bool> EnableHomogeneousPrologEpilog(
+ "homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden,
+ cl::desc("Emit homogeneous prologue and epilogue for the size "
+ "optimization (default = off)"));
+
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-/// Returns the argument pop size.
-static uint64_t getArgumentPopSize(MachineFunction &MF,
- MachineBasicBlock &MBB) {
+/// Returns how much of the incoming argument stack area (in bytes) we should
+/// clean up in an epilogue. For the C calling convention this will be 0, for
+/// guaranteed tail call conventions it can be positive (a normal return or a
+/// tail call to a function that uses less stack space for arguments) or
+/// negative (for a tail call to a function that needs more stack space than us
+/// for arguments).
+static int64_t getArgumentStackToRestore(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
bool IsTailCallReturn = false;
if (MBB.end() != MBBI) {
@@ -194,7 +211,7 @@ static uint64_t getArgumentPopSize(MachineFunction &MF,
}
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- uint64_t ArgumentPopSize = 0;
+ int64_t ArgumentPopSize = 0;
if (IsTailCallReturn) {
MachineOperand &StackAdjust = MBBI->getOperand(1);
@@ -213,6 +230,47 @@ static uint64_t getArgumentPopSize(MachineFunction &MF,
return ArgumentPopSize;
}
+static bool produceCompactUnwindFrame(MachineFunction &MF);
+static bool needsWinCFI(const MachineFunction &MF);
+static StackOffset getSVEStackSize(const MachineFunction &MF);
+
+/// Returns true if a homogeneous prolog or epilog code can be emitted
+/// for the size optimization. If possible, a frame helper call is injected.
+/// When Exit block is given, this check is for epilog.
+bool AArch64FrameLowering::homogeneousPrologEpilog(
+ MachineFunction &MF, MachineBasicBlock *Exit) const {
+ if (!MF.getFunction().hasMinSize())
+ return false;
+ if (!EnableHomogeneousPrologEpilog)
+ return false;
+ if (ReverseCSRRestoreSeq)
+ return false;
+ if (EnableRedZone)
+ return false;
+
+ // TODO: Window is supported yet.
+ if (needsWinCFI(MF))
+ return false;
+ // TODO: SVE is not supported yet.
+ if (getSVEStackSize(MF))
+ return false;
+
+ // Bail on stack adjustment needed on return for simplicity.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
+ return false;
+ if (Exit && getArgumentStackToRestore(MF, *Exit))
+ return false;
+
+ return true;
+}
+
+/// Returns true if CSRs should be paired.
+bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
+ return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
+}
+
/// This is the biggest offset to the stack pointer we can encode in aarch64
/// instructions (without using a separate calculation and a temp register).
/// Note that the exception here are vector stores/loads which cannot encode any
@@ -258,10 +316,10 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
const AArch64FunctionInfo *AFI, bool IsWin64,
bool IsFunclet) {
if (!IsWin64 || IsFunclet) {
- // Only Win64 uses fixed objects, and then only for the function (not
- // funclets)
- return 0;
+ return AFI->getTailCallReservedStack();
} else {
+ if (AFI->getTailCallReservedStack() != 0)
+ report_fatal_error("cannot generate ABI-changing tail call for Win64");
// Var args are stored here in the primary function.
const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
// To support EH funclets we allocate an UnwindHelp object
@@ -279,16 +337,20 @@ static StackOffset getSVEStackSize(const MachineFunction &MF) {
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
+
// Don't use the red zone if the function explicitly asks us not to.
// This is typically used for kernel code.
- if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const unsigned RedZoneSize =
+ Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
+ if (!RedZoneSize)
return false;
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
uint64_t NumBytes = AFI->getLocalStackSize();
- return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 ||
+ return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
getSVEStackSize(MF));
}
@@ -307,7 +369,7 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
return true;
if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
- RegInfo->needsStackRealignment(MF))
+ RegInfo->hasStackRealignment(MF))
return true;
// With large callframes around we may need to use FP to access the scavenging
// emergency spillslot.
@@ -560,7 +622,7 @@ bool AArch64FrameLowering::canUseAsPrologue(
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Don't need a scratch register if we're not going to re-align the stack.
- if (!RegInfo->needsStackRealignment(*MF))
+ if (!RegInfo->hasStackRealignment(*MF))
return true;
// Otherwise, we can use any block as long as it has a scratch register
// available.
@@ -596,6 +658,8 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ if (homogeneousPrologEpilog(MF))
+ return false;
if (AFI->getLocalStackSize() == 0)
return false;
@@ -620,7 +684,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
if (MFI.hasVarSizedObjects())
return false;
- if (RegInfo->needsStackRealignment(MF))
+ if (RegInfo->hasStackRealignment(MF))
return false;
// This isn't strictly necessary, but it simplifies things a bit since the
@@ -828,21 +892,17 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
++MBBI;
}
unsigned NewOpc;
- int Scale = 1;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
case AArch64::STPXi:
NewOpc = AArch64::STPXpre;
- Scale = 8;
break;
case AArch64::STPDi:
NewOpc = AArch64::STPDpre;
- Scale = 8;
break;
case AArch64::STPQi:
NewOpc = AArch64::STPQpre;
- Scale = 16;
break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
@@ -855,15 +915,12 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
- Scale = 8;
break;
case AArch64::LDPDi:
NewOpc = AArch64::LDPDpost;
- Scale = 8;
break;
case AArch64::LDPQi:
NewOpc = AArch64::LDPQpost;
- Scale = 16;
break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
@@ -882,6 +939,25 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
SEH->eraseFromParent();
}
+ TypeSize Scale = TypeSize::Fixed(1);
+ unsigned Width;
+ int64_t MinOffset, MaxOffset;
+ bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
+ NewOpc, Scale, Width, MinOffset, MaxOffset);
+ (void)Success;
+ assert(Success && "unknown load/store opcode");
+
+ // If the first store isn't right where we want SP then we can't fold the
+ // update in so create a normal arithmetic instruction instead.
+ if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
+ CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(CSStackSizeInc), TII,
+ InProlog ? MachineInstr::FrameSetup
+ : MachineInstr::FrameDestroy);
+ return std::prev(MBBI);
+ }
+
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
MIB.addReg(AArch64::SP, RegState::Define);
@@ -897,7 +973,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
assert(CSStackSizeInc % Scale == 0);
- MIB.addImm(CSStackSizeInc / Scale);
+ MIB.addImm(CSStackSizeInc / (int)Scale);
MIB.setMIFlags(MBBI->getFlags());
MIB.setMemRefs(MBBI->memoperands());
@@ -1053,16 +1129,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
if (MFnI.shouldSignReturnAddress()) {
+
+ unsigned PACI;
if (MFnI.shouldSignWithBKey()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
.setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
- .setMIFlag(MachineInstr::FrameSetup);
+ PACI = Subtarget.hasPAuth() ? AArch64::PACIB : AArch64::PACIBSP;
} else {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
- .setMIFlag(MachineInstr::FrameSetup);
+ PACI = Subtarget.hasPAuth() ? AArch64::PACIA : AArch64::PACIASP;
}
+ auto MI = BuildMI(MBB, MBBI, DL, TII->get(PACI));
+ if (Subtarget.hasPAuth())
+ MI.addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP, RegState::InternalRead);
+ MI.setMIFlag(MachineInstr::FrameSetup);
+
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -1070,6 +1153,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
}
+ // We signal the presence of a Swift extended frame to external tools by
+ // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
+ // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
+ // bits so that is still true.
+ if (HasFP && AFI->hasSwiftAsyncContext()) {
+ // ORR x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x1100)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
@@ -1139,12 +1234,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
+ bool HomPrologEpilog = homogeneousPrologEpilog(MF);
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
+ } else if (HomPrologEpilog) {
+ // Stack has been already adjusted.
+ NumBytes -= PrologueSaveSize;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
@@ -1172,13 +1271,35 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
- // Issue sub fp, sp, FPOffset or
- // mov fp,sp when FPOffset is zero.
- // Note: All stores of callee-saved registers are marked as "FrameSetup".
- // This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
- StackOffset::getFixed(FPOffset), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (AFI->hasSwiftAsyncContext()) {
+ // Before we update the live FP we have to ensure there's a valid (or
+ // null) asynchronous context in its slot just before FP in the frame
+ // record, so store it now.
+ const auto &Attrs = MF.getFunction().getAttributes();
+ bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
+ if (HaveInitialContext)
+ MBB.addLiveIn(AArch64::X22);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
+ .addUse(HaveInitialContext ? AArch64::X22 : AArch64::XZR)
+ .addUse(AArch64::SP)
+ .addImm(FPOffset - 8)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ if (HomPrologEpilog) {
+ auto Prolog = MBBI;
+ --Prolog;
+ assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
+ Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
+ } else {
+ // Issue sub fp, sp, FPOffset or
+ // mov fp,sp when FPOffset is zero.
+ // Note: All stores of callee-saved registers are marked as "FrameSetup".
+ // This code marks the instruction(s) that set the FP also.
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ }
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -1306,7 +1427,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
// Alignment is required for the parent frame, not the funclet
const bool NeedsRealignment =
- !IsFunclet && RegInfo->needsStackRealignment(MF);
+ !IsFunclet && RegInfo->hasStackRealignment(MF);
unsigned scratchSPReg = AArch64::SP;
if (NeedsRealignment) {
@@ -1561,9 +1682,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
- // Initial and residual are named for consistency with the prologue. Note that
- // in the epilogue, the residual adjustment is executed first.
- uint64_t ArgumentPopSize = getArgumentPopSize(MF, MBB);
+ // How much of the stack used by incoming arguments this function is expected
+ // to restore in this particular epilogue.
+ int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
// The stack frame should be like below,
//
@@ -1598,7 +1719,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
- uint64_t AfterCSRPopSize = ArgumentPopSize;
+ int64_t AfterCSRPopSize = ArgumentStackToRestore;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// We cannot rely on the local stack size set in emitPrologue if the function
// has funclets, as funclets have different local stack size requirements, and
@@ -1606,6 +1727,25 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// function.
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
+ if (homogeneousPrologEpilog(MF, &MBB)) {
+ assert(!NeedsWinCFI);
+ auto LastPopI = MBB.getFirstTerminator();
+ if (LastPopI != MBB.begin()) {
+ auto HomogeneousEpilog = std::prev(LastPopI);
+ if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
+ LastPopI = HomogeneousEpilog;
+ }
+
+ // Adjust local stack
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(AFI->getLocalStackSize()), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
+
+ // SP has been already adjusted while restoring callee save regs.
+ // We've bailed-out the case with adjusting SP for arguments.
+ assert(AfterCSRPopSize == 0);
+ return;
+ }
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
@@ -1616,8 +1756,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
- // If the offset is 0, convert it to a post-index ldp.
- if (OffsetOp.getImm() == 0)
+ // If the offset is 0 and the AfterCSR pop is not actually trying to
+ // allocate more stack for arguments (in space that an untimely interrupt
+ // may clobber), convert it to a post-index ldp.
+ if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0)
convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
else {
@@ -1657,6 +1799,18 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameDestroy);
}
+ if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
+ // We need to reset FP to its untagged state on return. Bit 60 is currently
+ // used to show the presence of an extended frame.
+
+ // BIC x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
+ AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x10fe)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+
const StackOffset &SVEStackSize = getSVEStackSize(MF);
// If there is a single SP update, insert it before the ret and we're done.
@@ -1776,6 +1930,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// assumes the SP is at the same location as it was after the callee-save save
// code in the prologue.
if (AfterCSRPopSize) {
+ assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
+ "interrupt may have clobbered");
// Find an insertion point for the first ldp so that it goes before the
// shadow call stack epilog instruction. This ensures that the restore of
// lr from x18 is placed after the restore from sp.
@@ -1791,7 +1947,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
+ StackOffset::getFixed(AfterCSRPopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
}
if (HasWinCFI)
@@ -1893,13 +2049,13 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
- } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
+ } else if (isCSR && RegInfo->hasStackRealignment(MF)) {
// References to the CSR area must use FP if we're re-aligning the stack
// since the dynamically-sized alignment padding is between the SP/BP and
// the CSR area.
assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
UseFP = true;
- } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
+ } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
// If the FPOffset is negative and we're producing a signed immediate, we
// have to keep in mind that the available offset range for negative
// offsets is smaller than for positive ones. If an offset is available
@@ -1941,9 +2097,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
}
}
- assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
- "In the presence of dynamic stack pointer realignment, "
- "non-argument/CSR objects cannot be accessed through the frame pointer");
+ assert(
+ ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
+ "In the presence of dynamic stack pointer realignment, "
+ "non-argument/CSR objects cannot be accessed through the frame pointer");
if (isSVE) {
StackOffset FPOffset =
@@ -1953,10 +2110,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
// Always use the FP for SVE spills if available and beneficial.
- if (hasFP(MF) &&
- (SPOffset.getFixed() ||
- FPOffset.getScalable() < SPOffset.getScalable() ||
- RegInfo->needsStackRealignment(MF))) {
+ if (hasFP(MF) && (SPOffset.getFixed() ||
+ FPOffset.getScalable() < SPOffset.getScalable() ||
+ RegInfo->hasStackRealignment(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
@@ -2009,7 +2165,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
AttributeList Attrs = MF.getFunction().getAttributes();
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
- Attrs.hasAttrSomewhere(Attribute::SwiftError));
+ Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
+ MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@@ -2123,6 +2280,7 @@ static void computeCalleeSaveRegisterPairs(
FirstReg = Count - 1;
}
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
+ bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -2221,22 +2379,27 @@ static void computeCalleeSaveRegisterPairs(
else
ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
+ // Swift's async context is directly before FP, so allocate an extra
+ // 8 bytes for it.
+ if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
+ RPI.Reg2 == AArch64::FP)
+ ByteOffset += StackFillDir * 8;
+
assert(!(RPI.isScalable() && RPI.isPaired()) &&
"Paired spill/fill instructions don't exist for SVE vectors");
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
- if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI &&
+ if (NeedGapToAlignStack && !NeedsWinCFI &&
!RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
- !RPI.isPaired()) {
+ !RPI.isPaired() && ByteOffset % 16 != 0) {
ByteOffset += 8 * StackFillDir;
- assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
// A stack frame with a gap looks like this, bottom up:
// d9, d8. x21, gap, x20, x19.
- // Set extra alignment on the x21 object (the only unpaired register)
- // to create the gap above it.
+ // Set extra alignment on the x21 object to create the gap above it.
MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
+ NeedGapToAlignStack = false;
}
int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
@@ -2244,6 +2407,12 @@ static void computeCalleeSaveRegisterPairs(
// If filling top down (default), we want the offset after incrementing it.
// If fillibg bootom up (WinCFI) we need the original offset.
int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
+
+ // The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
+ // Swift context can directly precede FP.
+ if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
+ RPI.Reg2 == AArch64::FP)
+ Offset += 8;
RPI.Offset = Offset / Scale;
assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
@@ -2324,6 +2493,22 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(AArch64::X18);
}
+ if (homogeneousPrologEpilog(MF)) {
+ auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ for (auto &RPI : RegPairs) {
+ MIB.addReg(RPI.Reg1);
+ MIB.addReg(RPI.Reg2);
+
+ // Update register live in.
+ if (!MRI.isReserved(RPI.Reg1))
+ MBB.addLiveIn(RPI.Reg1);
+ if (!MRI.isReserved(RPI.Reg2))
+ MBB.addLiveIn(RPI.Reg2);
+ }
+ return true;
+ }
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
@@ -2519,6 +2704,14 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
for (const RegPairInfo &RPI : reverse(RegPairs))
if (!RPI.isScalable())
EmitMI(RPI);
+ } else if (homogeneousPrologEpilog(MF, &MBB)) {
+ auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Epilog))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ for (auto &RPI : RegPairs) {
+ MIB.addReg(RPI.Reg1, RegState::Define);
+ MIB.addReg(RPI.Reg2, RegState::Define);
+ }
+ return true;
} else
for (const RegPairInfo &RPI : RegPairs)
if (!RPI.isScalable())
@@ -2588,7 +2781,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
- if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
+ if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
!SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
@@ -2667,7 +2860,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
// store the pair.
- if (produceCompactUnwindFrame(MF))
+ if (producePairRegisters(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPR;
}
@@ -2688,6 +2881,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Adding the size of additional 64bit GPR saves.
CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
+
+ // A Swift asynchronous context extends the frame record with a pointer
+ // directly before FP.
+ if (hasFP(MF) && AFI->hasSwiftAsyncContext())
+ CSStackSize += 8;
+
uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16);
LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
<< EstimatedStackSize + AlignedCSStackSize
@@ -2705,8 +2904,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
- MachineFunction &MF, const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF, const TargetRegisterInfo *RegInfo,
+ std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
+ unsigned &MaxCSFrameIndex) const {
bool NeedsWinCFI = needsWinCFI(MF);
// To match the canonical windows frame layout, reverse the list of
// callee saved registers to get them laid out by PrologEpilogInserter
@@ -2715,8 +2915,35 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
// the top, thus have the CSI array start from the highest registers.)
if (NeedsWinCFI)
std::reverse(CSI.begin(), CSI.end());
- // Let the generic code do the rest of the setup.
- return false;
+
+ if (CSI.empty())
+ return true; // Early exit if no callee saved registers are modified!
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ for (auto &CS : CSI) {
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ unsigned Size = RegInfo->getSpillSize(*RC);
+ Align Alignment(RegInfo->getSpillAlign(*RC));
+ int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
+ CS.setFrameIdx(FrameIdx);
+
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+
+ // Grab 8 bytes below FP for the extended asynchronous frame info.
+ if (hasFP(MF) && AFI->hasSwiftAsyncContext() && Reg == AArch64::FP) {
+ FrameIdx = MFI.CreateStackObject(8, Alignment, true);
+ AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ }
+ }
+ return true;
}
bool AArch64FrameLowering::enableStackSlotScavenging(