aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp818
1 files changed, 611 insertions, 207 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 4d5676f34101..caab59201a8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -269,14 +269,10 @@ STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
static int64_t getArgumentStackToRestore(MachineFunction &MF,
MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- bool IsTailCallReturn = false;
- if (MBB.end() != MBBI) {
- unsigned RetOpcode = MBBI->getOpcode();
- IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
- RetOpcode == AArch64::TCRETURNri ||
- RetOpcode == AArch64::TCRETURNriBTI;
- }
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ bool IsTailCallReturn = (MBB.end() != MBBI)
+ ? AArch64InstrInfo::isTailCallReturnInst(*MBBI)
+ : false;
int64_t ArgumentPopSize = 0;
if (IsTailCallReturn) {
@@ -300,7 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
-static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
@@ -331,6 +327,27 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
if (Exit && getArgumentStackToRestore(MF, *Exit))
return false;
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ if (AFI->hasSwiftAsyncContext())
+ return false;
+
+ // If there are an odd number of GPRs before LR and FP in the CSRs list,
+ // they will not be paired into one RegPairInfo, which is incompatible with
+ // the assumption made by the homogeneous prolog epilog pass.
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+ unsigned NumGPRs = 0;
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ Register Reg = CSRegs[I];
+ if (Reg == AArch64::LR) {
+ assert(CSRegs[I + 1] == AArch64::FP);
+ if (NumGPRs % 2 != 0)
+ return false;
+ break;
+ }
+ if (AArch64::GPR64RegClass.contains(Reg))
+ ++NumGPRs;
+ }
+
return true;
}
@@ -427,6 +444,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
// Win64 EH requires a frame pointer if funclets are present, as the locals
// are accessed off the frame pointer in both the parent function and the
// funclets.
@@ -461,6 +479,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
/// included as part of the stack frame.
bool
AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // The stack probing code for the dynamically allocated outgoing arguments
+ // area assumes that the stack is probed at the top - either by the prologue
+ // code, which issues a probe if `hasVarSizedObjects` return true, or by the
+ // most recent variable-sized object allocation. Changing the condition here
+ // may need to be followed up by changes to the probe issuing logic.
return !MF.getFrameInfo().hasVarSizedObjects();
}
@@ -469,6 +492,9 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
const AArch64InstrInfo *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const AArch64TargetLowering *TLI =
+ MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
+ [[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo();
DebugLoc DL = I->getDebugLoc();
unsigned Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
@@ -495,8 +521,24 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
// Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
- emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(Amount), TII);
+
+ if (TLI->hasInlineStackProbe(MF) &&
+ -Amount >= AArch64::StackProbeMaxUnprobedStack) {
+ // When stack probing is enabled, the decrement of SP may need to be
+ // probed. We only need to do this if the call site needs 1024 bytes of
+ // space or more, because a region smaller than that is allowed to be
+ // unprobed at an ABI boundary. We rely on the fact that SP has been
+ // probed exactly at this point, either by the prologue or most recent
+ // dynamic allocation.
+ assert(MFI.hasVarSizedObjects() &&
+ "non-reserved call frame without var sized objects?");
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
+ } else {
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(Amount), TII);
+ }
}
} else if (CalleePopAmount != 0) {
// If the calling convention demands that the callee pops arguments from the
@@ -612,7 +654,7 @@ void AArch64FrameLowering::resetCFIToInitialState(
}
// Shadow call stack uses X18, reset it.
- if (needsShadowCallStackPrologueEpilogue(MF))
+ if (MFI.needsShadowCallStackPrologueEpilogue(MF))
insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
TRI.getDwarfRegNum(AArch64::X18, true));
@@ -671,6 +713,153 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
emitCalleeSavedRestores(MBB, MBBI, true);
}
+// Return the maximum possible number of bytes for `Size` due to the
+// architectural limit on the size of a SVE register.
+static int64_t upperBound(StackOffset Size) {
+ static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+ return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
+}
+
+void AArch64FrameLowering::allocateStackSpace(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
+ bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
+ bool FollowupAllocs) const {
+
+ if (!AllocSize)
+ return;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ const int64_t MaxAlign = MFI.getMaxAlign().value();
+ const uint64_t AndMask = ~(MaxAlign - 1);
+
+ if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
+ Register TargetReg = RealignmentPadding
+ ? findScratchNonCalleeSaveRegister(&MBB)
+ : AArch64::SP;
+ // SUB Xd/SP, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+
+ if (RealignmentPadding) {
+ // AND SP, X9, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI.setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+ return;
+ }
+
+ //
+ // Stack probing allocation.
+ //
+
+ // Fixed length allocation. If we don't need to re-align the stack and don't
+ // have SVE objects, we can use a more efficient sequence for stack probing.
+ if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
+ Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+ assert(ScratchReg != AArch64::NoRegister);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC))
+ .addDef(ScratchReg)
+ .addImm(AllocSize.getFixed())
+ .addImm(InitialOffset.getFixed())
+ .addImm(InitialOffset.getScalable());
+ // The fixed allocation may leave unprobed bytes at the top of the
+ // stack. If we have subsequent alocation (e.g. if we have variable-sized
+ // objects), we need to issue an extra probe, so these allocations start in
+ // a known state.
+ if (FollowupAllocs) {
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ return;
+ }
+
+ // Variable length allocation.
+
+ // If the (unknown) allocation size cannot exceed the probe size, decrement
+ // the stack pointer right away.
+ int64_t ProbeSize = AFI.getStackProbeSize();
+ if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
+ Register ScratchReg = RealignmentPadding
+ ? findScratchNonCalleeSaveRegister(&MBB)
+ : AArch64::SP;
+ assert(ScratchReg != AArch64::NoRegister);
+ // SUB Xd, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+ if (RealignmentPadding) {
+ // AND SP, Xn, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI.setStackRealigned(true);
+ }
+ if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
+ AArch64::StackProbeMaxUnprobedStack) {
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+
+ // Emit a variable-length allocation probing loop.
+ // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
+ // each of them guaranteed to adjust the stack by less than the probe size.
+ Register TargetReg = findScratchNonCalleeSaveRegister(&MBB);
+ assert(TargetReg != AArch64::NoRegister);
+ // SUB Xd, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+ if (RealignmentPadding) {
+ // AND Xn, Xn, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR))
+ .addReg(TargetReg);
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ unsigned Reg =
+ Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ if (RealignmentPadding)
+ AFI.setStackRealigned(true);
+}
+
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
@@ -785,16 +974,11 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
// Zero out GPRs.
for (MCRegister Reg : GPRsToZero.set_bits())
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0);
+ TII.buildClearRegister(Reg, MBB, MBBI, DL);
// Zero out FP/vector registers.
for (MCRegister Reg : FPRsToZero.set_bits())
- if (HasSVE)
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::DUP_ZI_D), Reg)
- .addImm(0)
- .addImm(0);
- else
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVIv2d_ns), Reg).addImm(0);
+ TII.buildClearRegister(Reg, MBB, MBBI, DL);
if (HasSVE) {
for (MCRegister PReg :
@@ -808,6 +992,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
}
}
+static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
+ const MachineBasicBlock &MBB) {
+ const MachineFunction *MF = MBB.getParent();
+ LiveRegs.addLiveIns(MBB);
+ // Mark callee saved registers as used so we will not choose them.
+ const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ LiveRegs.addReg(CSRegs[i]);
+}
+
// Find a scratch register that we can use at the start of the prologue to
// re-align the stack pointer. We avoid using callee-save registers since they
// may appear to be free when this is called from canUseAsPrologue (during
@@ -829,12 +1023,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
- LiveRegs.addLiveIns(*MBB);
-
- // Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
- for (unsigned i = 0; CSRegs[i]; ++i)
- LiveRegs.addReg(CSRegs[i]);
+ getLiveRegsForEntryMBB(LiveRegs, *MBB);
// Prefer X9 since it was historically used for the prologue scratch reg.
const MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -854,9 +1043,24 @@ bool AArch64FrameLowering::canUseAsPrologue(
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
+ const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
+
+ if (AFI->hasSwiftAsyncContext()) {
+ const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ LivePhysRegs LiveRegs(TRI);
+ getLiveRegsForEntryMBB(LiveRegs, MBB);
+ // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are
+ // available.
+ if (!LiveRegs.available(MRI, AArch64::X16) ||
+ !LiveRegs.available(MRI, AArch64::X17))
+ return false;
+ }
- // Don't need a scratch register if we're not going to re-align the stack.
- if (!RegInfo->hasStackRealignment(*MF))
+ // Don't need a scratch register if we're not going to re-align the stack or
+ // emit stack probes.
+ if (!RegInfo->hasStackRealignment(*MF) && TLI->hasInlineStackProbe(*MF))
return true;
// Otherwise, we can use any block as long as it has a scratch register
// available.
@@ -866,15 +1070,11 @@ bool AArch64FrameLowering::canUseAsPrologue(
static bool windowsRequiresStackProbe(MachineFunction &MF,
uint64_t StackSizeInBytes) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- if (!Subtarget.isTargetWindows())
- return false;
- const Function &F = MF.getFunction();
+ const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
// TODO: When implementing stack protectors, take that into account
// for the probe threshold.
- unsigned StackProbeSize =
- F.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
- return (StackSizeInBytes >= StackProbeSize) &&
- !F.hasFnAttribute("no-stack-arg-probe");
+ return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
+ StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
}
static bool needsWinCFI(const MachineFunction &MF) {
@@ -1163,8 +1363,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
SEH->eraseFromParent();
}
- TypeSize Scale = TypeSize::Fixed(1);
- unsigned Width;
+ TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
@@ -1290,19 +1489,6 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
-static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) {
- if (!(llvm::any_of(
- MF.getFrameInfo().getCalleeSavedInfo(),
- [](const auto &Info) { return Info.getReg() == AArch64::LR; }) &&
- MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)))
- return false;
-
- if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
- report_fatal_error("Must reserve x18 to use shadow call stack");
-
- return true;
-}
-
static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -1385,6 +1571,20 @@ static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
.setMIFlags(MachineInstr::FrameSetup);
}
+#ifndef NDEBUG
+/// Collect live registers from the end of \p MI's parent up to (including) \p
+/// MI in \p LiveRegs.
+static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LivePhysRegs &LiveRegs) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI :
+ reverse(make_range(MI.getIterator(), MBB.instr_end())))
+ LiveRegs.stepBackward(MI);
+}
+#endif
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1393,6 +1593,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
@@ -1402,6 +1603,40 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool HasWinCFI = false;
auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
+ MachineBasicBlock::iterator End = MBB.end();
+#ifndef NDEBUG
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ // Collect live register from the end of MBB up to the start of the existing
+ // frame setup instructions.
+ MachineBasicBlock::iterator NonFrameStart = MBB.begin();
+ while (NonFrameStart != End &&
+ NonFrameStart->getFlag(MachineInstr::FrameSetup))
+ ++NonFrameStart;
+
+ LivePhysRegs LiveRegs(*TRI);
+ if (NonFrameStart != MBB.end()) {
+ getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
+ // Ignore registers used for stack management for now.
+ LiveRegs.removeReg(AArch64::SP);
+ LiveRegs.removeReg(AArch64::X19);
+ LiveRegs.removeReg(AArch64::FP);
+ LiveRegs.removeReg(AArch64::LR);
+ }
+
+ auto VerifyClobberOnExit = make_scope_exit([&]() {
+ if (NonFrameStart == MBB.end())
+ return;
+ // Check if any of the newly instructions clobber any of the live registers.
+ for (MachineInstr &MI :
+ make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
+ for (auto &Op : MI.operands())
+ if (Op.isReg() && Op.isDef())
+ assert(!LiveRegs.contains(Op.getReg()) &&
+ "live register clobbered by inserted prologue instructions");
+ }
+ });
+#endif
+
bool IsFunclet = MBB.isEHFuncletEntry();
// At this point, we're going to decide whether or not the function uses a
@@ -1414,35 +1649,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
DebugLoc DL;
const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
- if (needsShadowCallStackPrologueEpilogue(MF))
+ if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
MFnI.needsDwarfUnwindInfo(MF));
if (MFnI.shouldSignReturnAddress(MF)) {
- if (MFnI.shouldSignWithBKey()) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // No SEH opcode for this one; it doesn't materialize into an
- // instruction on Windows.
- BuildMI(MBB, MBBI, DL,
- TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP
- : AArch64::PACIASP))
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
.setMIFlag(MachineInstr::FrameSetup);
-
- if (EmitCFI) {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- } else if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
- .setMIFlag(MachineInstr::FrameSetup);
- }
+ if (NeedsWinCFI)
+ HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
}
+
if (EmitCFI && MFnI.isMTETagged()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
.setMIFlag(MachineInstr::FrameSetup);
@@ -1461,10 +1678,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
.addExternalSymbol("swift_async_extendedFramePointerFlags",
AArch64II::MO_GOT);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
.addUse(AArch64::FP)
.addUse(AArch64::X16)
.addImm(Subtarget.isTargetILP32() ? 32 : 0);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
break;
}
[[fallthrough]];
@@ -1475,6 +1702,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addUse(AArch64::FP)
.addImm(0x1100)
.setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
break;
case SwiftAsyncFramePointerMode::Never:
@@ -1573,7 +1805,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Move past the saves of the callee-saved registers, fixing up the offsets
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
- MachineBasicBlock::iterator End = MBB.end();
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
if (CombineSPBump)
@@ -1598,11 +1829,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
if (HaveInitialContext)
MBB.addLiveIn(AArch64::X22);
+ Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
- .addUse(HaveInitialContext ? AArch64::X22 : AArch64::XZR)
+ .addUse(Reg)
.addUse(AArch64::SP)
.addImm(FPOffset - 8)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
+ // to multiple instructions, should be mutually-exclusive.
+ assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
}
if (HomPrologEpilog) {
@@ -1639,7 +1879,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Alignment is required for the parent frame, not the funclet
const bool NeedsRealignment =
NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
- int64_t RealignmentPadding =
+ const int64_t RealignmentPadding =
(NeedsRealignment && MFI.getMaxAlign() > Align(16))
? MFI.getMaxAlign().value() - 16
: 0;
@@ -1769,12 +2009,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
// allocated.
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+ << "\n");
// Find callee save instructions in frame.
CalleeSavesBegin = MBBI;
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
@@ -1782,67 +2024,34 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
- AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
- AllocateAfter = SVEStackSize - AllocateBefore;
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
}
// Allocate space for the callee saves (if any).
- emitFrameOffset(
- MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
- MachineInstr::FrameSetup, false, false, nullptr,
- EmitAsyncCFI && !HasFP && AllocateBefore,
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+ StackOffset CFAOffset =
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+ StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+ allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
+ nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ CFAOffset += SVECalleeSavesSize;
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
- // Finally allocate remaining SVE stack space.
- emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
- -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
- nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
- AllocateBefore + StackOffset::getFixed(
- (int64_t)MFI.getStackSize() - NumBytes));
-
- // Allocate space for the rest of the frame.
- if (NumBytes) {
- unsigned scratchSPReg = AArch64::SP;
-
- if (NeedsRealignment) {
- scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
- assert(scratchSPReg != AArch64::NoRegister);
- }
-
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- emitFrameOffset(
- MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
- SVEStackSize +
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
- }
- if (NeedsRealignment) {
- assert(MFI.getMaxAlign() > Align(1));
- assert(scratchSPReg != AArch64::SP);
-
- // SUB X9, SP, NumBytes
- // -- X9 is temporary register, so shouldn't contain any live data here,
- // -- free to use. This is already produced by emitFrameOffset above.
- // AND SP, X9, 0b11111...0000
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(scratchSPReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
- AFI->setStackRealigned(true);
-
- // No need for SEH instructions here; if we're realigning the stack,
- // we've set a frame pointer and already finished the SEH prologue.
- assert(!NeedsWinCFI);
- }
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ CFAOffset, MFI.hasVarSizedObjects());
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1901,54 +2110,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
-static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB,
- bool NeedsWinCFI, bool *HasWinCFI) {
- const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
- if (!MFI.shouldSignReturnAddress(MF))
- return;
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- bool EmitAsyncCFI = MFI.needsAsyncDwarfUnwindInfo(MF);
-
- MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
-
- // The AUTIASP instruction assembles to a hint instruction before v8.3a so
- // this instruction can safely used for any v8a architecture.
- // From v8.3a onwards there are optimised authenticate LR and return
- // instructions, namely RETA{A,B}, that can be used instead. In this case the
- // DW_CFA_AARCH64_negate_ra_state can't be emitted.
- if (Subtarget.hasPAuth() &&
- !MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack) &&
- MBBI != MBB.end() && MBBI->getOpcode() == AArch64::RET_ReallyLR &&
- !NeedsWinCFI) {
- BuildMI(MBB, MBBI, DL,
- TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
- .copyImplicitOps(*MBBI);
- MBB.erase(MBBI);
- } else {
- BuildMI(
- MBB, MBBI, DL,
- TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
- .setMIFlag(MachineInstr::FrameDestroy);
-
- if (EmitAsyncCFI) {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameDestroy);
- }
- if (NeedsWinCFI) {
- *HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
- .setMIFlag(MachineInstr::FrameDestroy);
- }
- }
-}
-
static bool isFuncletReturnInstr(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
@@ -1963,36 +2124,50 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool NeedsWinCFI = needsWinCFI(MF);
- bool EmitCFI =
- MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF);
+ bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
bool HasWinCFI = false;
bool IsFunclet = false;
- auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
IsFunclet = isFuncletReturnInstr(*MBBI);
}
+ MachineBasicBlock::iterator EpilogStartI = MBB.end();
+
auto FinishingTouches = make_scope_exit([&]() {
- InsertReturnAddressAuth(MF, MBB, NeedsWinCFI, &HasWinCFI);
- if (needsShadowCallStackPrologueEpilogue(MF))
+ if (AFI->shouldSignReturnAddress(MF)) {
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::PAUTH_EPILOGUE))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ if (NeedsWinCFI)
+ HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
+ }
+ if (AFI->needsShadowCallStackPrologueEpilogue(MF))
emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
if (EmitCFI)
emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
- if (HasWinCFI)
+ if (HasWinCFI) {
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
+ if (!MF.hasWinCFI())
+ MF.setHasWinCFI(true);
+ }
+ if (NeedsWinCFI) {
+ assert(EpilogStartI != MBB.end());
+ if (!HasWinCFI)
+ MBB.erase(EpilogStartI);
+ }
});
int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
: MFI.getStackSize();
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -2026,7 +2201,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Adjust local stack
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AFI->getLocalStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
// SP has been already adjusted while restoring callee save regs.
// We've bailed-out the case with adjusting SP for arguments.
@@ -2078,16 +2253,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NeedsWinCFI, &HasWinCFI);
}
- if (MF.hasWinCFI()) {
- // If the prologue didn't contain any SEH opcodes and didn't set the
- // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the
- // EpilogStart - to avoid generating CFI for functions that don't need it.
- // (And as we didn't generate any prologue at all, it would be asymmetrical
- // to the epilogue.) By the end of the function, we assert that
- // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption.
- HasWinCFI = true;
+ if (NeedsWinCFI) {
+ // Note that there are cases where we insert SEH opcodes in the
+ // epilogue when we had no SEH opcodes in the prologue. For
+ // example, when there is no stack frame but there are stack
+ // arguments. Insert the SEH_EpilogStart and remove it later if it
+ // we didn't emit any SEH opcodes to avoid generating WinCFI for
+ // functions that don't need it.
BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
.setMIFlag(MachineInstr::FrameDestroy);
+ EpilogStartI = LastPopI;
+ --EpilogStartI;
}
if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
@@ -2107,6 +2283,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
.addUse(AArch64::FP)
.addImm(0x10fe)
.setMIFlag(MachineInstr::FrameDestroy);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameDestroy);
+ HasWinCFI = true;
+ }
break;
case SwiftAsyncFramePointerMode::Never:
@@ -2241,11 +2422,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
emitFrameOffset(
MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
- TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
// When we are about to restore the CSRs, the CFA register is SP again.
if (EmitCFI && hasFP(MF)) {
@@ -2702,7 +2883,8 @@ static void computeCalleeSaveRegisterPairs(
// Swift's async context is directly before FP, so allocate an extra
// 8 bytes for it.
if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
- RPI.Reg2 == AArch64::FP)
+ ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
+ (IsWindows && RPI.Reg2 == AArch64::LR)))
ByteOffset += StackFillDir * 8;
assert(!(RPI.isScalable() && RPI.isPaired()) &&
@@ -2725,13 +2907,14 @@ static void computeCalleeSaveRegisterPairs(
int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(OffsetPost % Scale == 0);
// If filling top down (default), we want the offset after incrementing it.
- // If fillibg bootom up (WinCFI) we need the original offset.
+ // If filling bottom up (WinCFI) we need the original offset.
int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
// The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
// Swift context can directly precede FP.
if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
- RPI.Reg2 == AArch64::FP)
+ ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
+ (IsWindows && RPI.Reg2 == AArch64::LR)))
Offset += 8;
RPI.Offset = Offset / Scale;
@@ -2788,7 +2971,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Update register live in.
if (!MRI.isReserved(RPI.Reg1))
MBB.addLiveIn(RPI.Reg1);
- if (!MRI.isReserved(RPI.Reg2))
+ if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
MBB.addLiveIn(RPI.Reg2);
}
return true;
@@ -3038,6 +3221,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
: (unsigned)AArch64::NoRegister;
unsigned ExtraCSSpill = 0;
+ bool HasUnpairedGPR64 = false;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
@@ -3048,10 +3232,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
bool RegUsed = SavedRegs.test(Reg);
unsigned PairedReg = AArch64::NoRegister;
- if (AArch64::GPR64RegClass.contains(Reg) ||
- AArch64::FPR64RegClass.contains(Reg) ||
- AArch64::FPR128RegClass.contains(Reg))
- PairedReg = CSRegs[i ^ 1];
+ const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
+ if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR128RegClass.contains(Reg)) {
+ // Compensate for odd numbers of GP CSRs.
+ // For now, all the known cases of odd number of CSRs are of GPRs.
+ if (HasUnpairedGPR64)
+ PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
+ else
+ PairedReg = CSRegs[i ^ 1];
+ }
+
+ // If the function requires all the GP registers to save (SavedRegs),
+ // and there are an odd number of GP CSRs at the same time (CSRegs),
+ // PairedReg could be in a different register class from Reg, which would
+ // lead to a FPR (usually D8) accidentally being marked saved.
+ if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
+ PairedReg = AArch64::NoRegister;
+ HasUnpairedGPR64 = true;
+ }
+ assert(PairedReg == AArch64::NoRegister ||
+ AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
+ AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
+ AArch64::FPR128RegClass.contains(Reg, PairedReg));
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
@@ -3150,12 +3353,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
<< " to get a scratch register.\n");
SavedRegs.set(UnspilledCSGPR);
+ ExtraCSSpill = UnspilledCSGPR;
+
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
// store the pair.
- if (producePairRegisters(MF))
- SavedRegs.set(UnspilledCSGPRPaired);
- ExtraCSSpill = UnspilledCSGPR;
+ if (producePairRegisters(MF)) {
+ if (UnspilledCSGPRPaired == AArch64::NoRegister) {
+ // Failed to make a pair for compact unwind format, revert spilling.
+ if (produceCompactUnwindFrame(MF)) {
+ SavedRegs.reset(UnspilledCSGPR);
+ ExtraCSSpill = AArch64::NoRegister;
+ }
+ } else
+ SavedRegs.set(UnspilledCSGPRPaired);
+ }
}
// If we didn't find an extra callee-saved register to spill, create
@@ -3252,6 +3464,12 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // If the function has streaming-mode changes, don't scavenge a
+ // spillslot in the callee-save area, as that might require an
+ // 'addvl' in the streaming-mode-changing call-sequence when the
+ // function doesn't use a FP.
+ if (AFI->hasStreamingModeChanges() && !hasFP(MF))
+ return false;
return AFI->hasCalleeSaveStackFreeSpace();
}
@@ -3412,7 +3630,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
// function.
DebugLoc DL;
RS->enterBasicBlockEnd(MBB);
- RS->backward(std::prev(MBBI));
+ RS->backward(MBBI);
Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
assert(DstReg && "There must be a free register after frame setup");
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
@@ -3775,7 +3993,26 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
// New code will be inserted after the last tagging instruction we've found.
MachineBasicBlock::iterator InsertI = Instrs.back().MI;
+
+ // All the gathered stack tag instructions are merged and placed after
+ // last tag store in the list. The check should be made if the nzcv
+ // flag is live at the point where we are trying to insert. Otherwise
+ // the nzcv flag might get clobbered if any stg loops are present.
+
+ // FIXME : This approach of bailing out from merge is conservative in
+ // some ways like even if stg loops are not present after merge the
+ // insert list, this liveness check is done (which is not needed).
+ LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
+ LiveRegs.addLiveOuts(*MBB);
+ for (auto I = MBB->rbegin();; ++I) {
+ MachineInstr &MI = *I;
+ if (MI == InsertI)
+ break;
+ LiveRegs.stepBackward(*I);
+ }
InsertI++;
+ if (LiveRegs.contains(AArch64::NZCV))
+ return InsertI;
llvm::stable_sort(Instrs,
[](const TagStoreInstr &Left, const TagStoreInstr &Right) {
@@ -4024,3 +4261,170 @@ void AArch64FrameLowering::orderFrameObjects(
dbgs() << "\n";
});
}
+
+/// Emit a loop to decrement SP until it is equal to TargetReg, with probes at
+/// least every ProbeSize bytes. Returns an iterator of the first instruction
+/// after the loop. The difference between SP and TargetReg must be an exact
+/// multiple of ProbeSize.
+MachineBasicBlock::iterator
+AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
+ MachineBasicBlock::iterator MBBI, int64_t ProbeSize,
+ Register TargetReg) const {
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+
+ // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable
+ // in SUB).
+ emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize), TII,
+ MachineInstr::FrameSetup);
+ // STR XZR, [SP]
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ // CMP SP, TargetReg
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(MachineInstr::FrameSetup);
+ // B.CC Loop
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::NE)
+ .addMBB(LoopMBB)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ MBB.addSuccessor(LoopMBB);
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
+
+ return ExitMBB->begin();
+}
+
+void AArch64FrameLowering::inlineStackProbeFixed(
+ MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize,
+ StackOffset CFAOffset) const {
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+ bool HasFP = hasFP(MF);
+
+ DebugLoc DL;
+ int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
+ int64_t NumBlocks = FrameSize / ProbeSize;
+ int64_t ResidualSize = FrameSize % ProbeSize;
+
+ LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
+ << NumBlocks << " blocks of " << ProbeSize
+ << " bytes, plus " << ResidualSize << " bytes\n");
+
+ // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or
+ // ordinary loop.
+ if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) {
+ for (int i = 0; i < NumBlocks; ++i) {
+ // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not
+ // encodable in a SUB).
+ emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += StackOffset::getFixed(ProbeSize);
+ // STR XZR, [SP]
+ BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ } else if (NumBlocks != 0) {
+ // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not
+ // encodable in ADD). ScrathReg may temporarily become the CFA register.
+ emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize * NumBlocks), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks);
+ MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
+ MBB = MBBI->getParent();
+ if (EmitAsyncCFI && !HasFP) {
+ // Set the CFA register back to SP.
+ const AArch64RegisterInfo &RegInfo =
+ *MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+
+ if (ResidualSize != 0) {
+ // SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable
+ // in SUB).
+ emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ResidualSize), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) {
+ // STR XZR, [SP]
+ BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+}
+
+void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // Get the instructions that need to be replaced. We emit at most two of
+ // these. Remember them in order to avoid complications coming from the need
+ // to traverse the block while potentially creating more blocks.
+ SmallVector<MachineInstr *, 4> ToReplace;
+ for (MachineInstr &MI : MBB)
+ if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
+ MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
+ ToReplace.push_back(&MI);
+
+ for (MachineInstr *MI : ToReplace) {
+ if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
+ Register ScratchReg = MI->getOperand(0).getReg();
+ int64_t FrameSize = MI->getOperand(1).getImm();
+ StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(),
+ MI->getOperand(3).getImm());
+ inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,
+ CFAOffset);
+ } else {
+ assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
+ "Stack probe pseudo-instruction expected");
+ const AArch64InstrInfo *TII =
+ MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
+ Register TargetReg = MI->getOperand(0).getReg();
+ (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);
+ }
+ MI->eraseFromParent();
+ }
+}