summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp702
1 files changed, 582 insertions, 120 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ea3e800a1ad20..efa3fd5ca9cef 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -170,8 +170,45 @@ static cl::opt<bool>
cl::desc("reverse the CSR restore sequence"),
cl::init(false), cl::Hidden);
+static cl::opt<bool> StackTaggingMergeSetTag(
+ "stack-tagging-merge-settag",
+ cl::desc("merge settag instruction in function epilog"), cl::init(true),
+ cl::Hidden);
+
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+/// Returns the argument pop size.
+static uint64_t getArgumentPopSize(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ bool IsTailCallReturn = false;
+ if (MBB.end() != MBBI) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
+ RetOpcode == AArch64::TCRETURNri ||
+ RetOpcode == AArch64::TCRETURNriBTI;
+ }
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ uint64_t ArgumentPopSize = 0;
+ if (IsTailCallReturn) {
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+ // For a tail-call in a callee-pops-arguments environment, some or all of
+ // the stack may actually be in use for the call's arguments, this is
+ // calculated during LowerCall and consumed here...
+ ArgumentPopSize = StackAdjust.getImm();
+ } else {
+ // ... otherwise the amount to pop is *all* of the argument space,
+ // conveniently stored in the MachineFunctionInfo by
+ // LowerFormalArguments. This will, of course, be zero for the C calling
+ // convention.
+ ArgumentPopSize = AFI->getArgumentStackToRestore();
+ }
+
+ return ArgumentPopSize;
+}
+
/// This is the biggest offset to the stack pointer we can encode in aarch64
/// instructions (without using a separate calculation and a temp register).
/// Note that the exception here are vector stores/loads which cannot encode any
@@ -211,6 +248,24 @@ AArch64FrameLowering::getStackIDForScalableVectors() const {
return TargetStackID::SVEVector;
}
+/// Returns the size of the fixed object area (allocated next to sp on entry)
+/// On Win64 this may include a var args area and an UnwindHelp object for EH.
+static unsigned getFixedObjectSize(const MachineFunction &MF,
+ const AArch64FunctionInfo *AFI, bool IsWin64,
+ bool IsFunclet) {
+ if (!IsWin64 || IsFunclet) {
+ // Only Win64 uses fixed objects, and then only for the function (not
+ // funclets)
+ return 0;
+ } else {
+ // Var args are stored here in the primary function.
+ const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
+ // To support EH funclets we allocate an UnwindHelp object
+ const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
+ return alignTo(VarArgsArea + UnwindHelpObject, 16);
+ }
+}
+
/// Returns the size of the entire SVE stackframe (calleesaves + spills).
static StackOffset getSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -286,10 +341,8 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
if (!hasReservedCallFrame(MF)) {
- unsigned Align = getStackAlignment();
-
int64_t Amount = I->getOperand(0).getImm();
- Amount = alignTo(Amount, Align);
+ Amount = alignTo(Amount, getStackAlign());
if (!IsDestroy)
Amount = -Amount;
@@ -480,6 +533,39 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
return true;
}
+bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
+ MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
+ if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
+ return false;
+
+ if (MBB.empty())
+ return true;
+
+ // Disable combined SP bump if the last instruction is an MTE tag store. It
+ // is almost always better to merge SP adjustment into those instructions.
+ MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
+ MachineBasicBlock::iterator Begin = MBB.begin();
+ while (LastI != Begin) {
+ --LastI;
+ if (LastI->isTransient())
+ continue;
+ if (!LastI->getFlag(MachineInstr::FrameDestroy))
+ break;
+ }
+ switch (LastI->getOpcode()) {
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ return false;
+ default:
+ return true;
+ }
+ llvm_unreachable("unreachable");
+}
+
// Given a load or a store instruction, generate an appropriate unwinding SEH
// code on Windows.
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
@@ -940,11 +1026,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
// Encode the stack size of the leaf function.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -959,10 +1045,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- // Var args are accounted for in the containing function, so don't
- // include them for funclets.
- unsigned FixedObject = (IsWin64 && !IsFunclet) ?
- alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
@@ -993,32 +1076,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
}
- // The code below is not applicable to funclets. We have emitted all the SEH
- // opcodes that we needed to emit. The FP and BP belong to the containing
- // function.
- if (IsFunclet) {
- if (NeedsWinCFI) {
- HasWinCFI = true;
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // SEH funclets are passed the frame pointer in X1. If the parent
- // function uses the base register, then the base register is used
- // directly, and is not retrieved from X1.
- if (F.hasPersonalityFn()) {
- EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
- if (isAsynchronousEHPersonality(Per)) {
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
- .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
- MBB.addLiveIn(AArch64::X1);
- }
- }
-
- return;
- }
-
- if (HasFP) {
+ // For funclets the FP belongs to the containing function.
+ if (!IsFunclet && HasFP) {
// Only set up FP if we actually need to.
int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
@@ -1099,7 +1158,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
+ BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
.addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
@@ -1161,7 +1220,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Allocate space for the rest of the frame.
if (NumBytes) {
- const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
+ // Alignment is required for the parent frame, not the funclet
+ const bool NeedsRealignment =
+ !IsFunclet && RegInfo->needsStackRealignment(MF);
unsigned scratchSPReg = AArch64::SP;
if (NeedsRealignment) {
@@ -1179,8 +1240,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
false, NeedsWinCFI, &HasWinCFI);
if (NeedsRealignment) {
- const unsigned Alignment = MFI.getMaxAlignment();
- const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
assert(NrBitsToZero > 1);
assert(scratchSPReg != AArch64::SP);
@@ -1215,7 +1275,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: Clarify FrameSetup flags here.
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
// needed.
- if (RegInfo->hasBasePointer(MF)) {
+ // For funclets the BP belongs to the containing function.
+ if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
false);
if (NeedsWinCFI) {
@@ -1232,6 +1293,19 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
+ // SEH funclets are passed the frame pointer in X1. If the parent
+ // function uses the base register, then the base register is used
+ // directly, and is not retrieved from X1.
+ if (IsFunclet && F.hasPersonalityFn()) {
+ EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
+ if (isAsynchronousEHPersonality(Per)) {
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
+ .addReg(AArch64::X1)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MBB.addLiveIn(AArch64::X1);
+ }
+ }
+
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
const int StackGrowth = isTargetDarwin(MF)
@@ -1307,15 +1381,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, Reg, StackGrowth - FixedObject));
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
} else {
// Encode the stack size of the leaf function.
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1374,7 +1448,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
- bool IsTailCallReturn = false;
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
bool IsFunclet = false;
@@ -1385,10 +1458,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
- unsigned RetOpcode = MBBI->getOpcode();
- IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
- RetOpcode == AArch64::TCRETURNri ||
- RetOpcode == AArch64::TCRETURNriBTI;
IsFunclet = isFuncletReturnInstr(*MBBI);
}
@@ -1403,21 +1472,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Initial and residual are named for consistency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
- uint64_t ArgumentPopSize = 0;
- if (IsTailCallReturn) {
- MachineOperand &StackAdjust = MBBI->getOperand(1);
-
- // For a tail-call in a callee-pops-arguments environment, some or all of
- // the stack may actually be in use for the call's arguments, this is
- // calculated during LowerCall and consumed here...
- ArgumentPopSize = StackAdjust.getImm();
- } else {
- // ... otherwise the amount to pop is *all* of the argument space,
- // conveniently stored in the MachineFunctionInfo by
- // LowerFormalArguments. This will, of course, be zero for the C calling
- // convention.
- ArgumentPopSize = AFI->getArgumentStackToRestore();
- }
+ uint64_t ArgumentPopSize = getArgumentPopSize(MF, MBB);
// The stack frame should be like below,
//
@@ -1450,10 +1505,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- // Var args are accounted for in the containing function, so don't
- // include them for funclets.
- unsigned FixedObject =
- (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
uint64_t AfterCSRPopSize = ArgumentPopSize;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
@@ -1463,7 +1515,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// function.
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
- bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
+ bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && PrologueSaveSize != 0) {
@@ -1660,7 +1712,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
/// SP-relative and simple call frames aren't used.
int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
return resolveFrameIndexReference(
MF, FI, FrameReg,
/*PreferFP=*/
@@ -1679,7 +1731,9 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+
+ unsigned FixedObject =
+ getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
unsigned FPAdjust = isTargetDarwin(MF)
? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
@@ -1701,7 +1755,7 @@ int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
}
StackOffset AArch64FrameLowering::resolveFrameIndexReference(
- const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP,
+ const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP,
bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
@@ -1713,7 +1767,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
- unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
+ Register &FrameReg, bool PreferFP, bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
@@ -1764,10 +1818,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool CanUseBP = RegInfo->hasBasePointer(MF);
if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
UseFP = PreferFP;
- else if (!CanUseBP) { // Can't use BP. Forced to use FP.
- assert(!SVEStackSize && "Expected BP to be available");
+ else if (!CanUseBP) // Can't use BP. Forced to use FP.
UseFP = true;
- }
// else we can use BP and FP, but the offset from FP won't fit.
// That will make us scavenge registers which we can probably avoid by
// using BP. If it won't fit for BP either, we'll scavenge anyway.
@@ -1933,7 +1985,7 @@ struct RegPairInfo {
} // end anonymous namespace
static void computeCalleeSaveRegisterPairs(
- MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
+ MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
@@ -2058,8 +2110,8 @@ static void computeCalleeSaveRegisterPairs(
FixupDone = true;
ByteOffset -= 8;
assert(ByteOffset % 16 == 0);
- assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
- MFI.setObjectAlignment(RPI.FrameIdx, 16);
+ assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
+ MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
}
int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
@@ -2078,8 +2130,7 @@ static void computeCalleeSaveRegisterPairs(
bool AArch64FrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
bool NeedsWinCFI = needsWinCFI(MF);
@@ -2142,32 +2193,33 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
- unsigned Size, Align;
+ unsigned Size;
+ Align Alignment;
switch (RPI.Type) {
case RegPairInfo::GPR:
StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR64:
StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR128:
StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::ZPR:
StrOpc = AArch64::STR_ZXI;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::PPR:
StrOpc = AArch64::STR_PXI;
Size = 2;
- Align = 2;
+ Alignment = Align(2);
break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
@@ -2196,7 +2248,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
- MachineMemOperand::MOStore, Size, Align));
+ MachineMemOperand::MOStore, Size, Alignment));
}
MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
@@ -2204,8 +2256,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// where factor*scale is implicit
.setMIFlag(MachineInstr::FrameSetup);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
- MachineMemOperand::MOStore, Size, Align));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
+ MachineMemOperand::MOStore, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
@@ -2220,8 +2272,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
@@ -2248,32 +2299,33 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// ldp x22, x21, [sp, #0] // addImm(+0)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
- unsigned Size, Align;
+ unsigned Size;
+ Align Alignment;
switch (RPI.Type) {
case RegPairInfo::GPR:
LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR64:
LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR128:
LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::ZPR:
LdrOpc = AArch64::LDR_ZXI;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::PPR:
LdrOpc = AArch64::LDR_PXI;
Size = 2;
- Align = 2;
+ Alignment = Align(2);
break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
@@ -2296,7 +2348,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MIB.addReg(Reg2, getDefRegState(true));
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
- MachineMemOperand::MOLoad, Size, Align));
+ MachineMemOperand::MOLoad, Size, Alignment));
}
MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
@@ -2305,7 +2357,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
.setMIFlag(MachineInstr::FrameDestroy);
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
- MachineMemOperand::MOLoad, Size, Align));
+ MachineMemOperand::MOLoad, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
@@ -2348,6 +2400,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -2396,6 +2449,16 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
+ !Subtarget.isTargetWindows()) {
+ // For Windows calling convention on a non-windows OS, where X18 is treated
+ // as reserved, back up X18 when entering non-windows code (marked with the
+ // Windows calling convention) and restore when returning regardless of
+ // whether the individual function uses it - it might call other functions
+ // that clobber it.
+ SavedRegs.set(AArch64::X18);
+ }
+
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
unsigned SVECSStackSize = 0;
@@ -2467,8 +2530,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass &RC = AArch64::GPR64RegClass;
unsigned Size = TRI->getSpillSize(RC);
- unsigned Align = TRI->getSpillAlignment(RC);
- int FI = MFI.CreateStackObject(Size, Align, false);
+ Align Alignment = TRI->getSpillAlign(RC);
+ int FI = MFI.CreateStackObject(Size, Alignment, false);
RS->addScavengingFrameIndex(FI);
LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
<< " as the emergency spill slot.\n");
@@ -2549,12 +2612,12 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
// Then process all callee saved slots.
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
// Make sure to align the last callee save slot.
- MFI.setObjectAlignment(MaxCSFrameIndex, 16U);
+ MFI.setObjectAlignment(MaxCSFrameIndex, Align(16));
// Assign offsets to the callee save slots.
for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
Offset += MFI.getObjectSize(I);
- Offset = alignTo(Offset, MFI.getObjectAlignment(I));
+ Offset = alignTo(Offset, MFI.getObjectAlign(I));
if (AssignOffsets)
Assign(I, -Offset);
}
@@ -2576,15 +2639,15 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
// Allocate all SVE locals and spills
for (unsigned FI : ObjectsToAllocate) {
- unsigned Align = MFI.getObjectAlignment(FI);
+ Align Alignment = MFI.getObjectAlign(FI);
// FIXME: Given that the length of SVE vectors is not necessarily a power of
// two, we'd need to align every object dynamically at runtime if the
// alignment is larger than 16. This is not yet supported.
- if (Align > 16)
+ if (Alignment > Align(16))
report_fatal_error(
"Alignment of scalable vectors > 16 bytes is not yet supported");
- Offset = alignTo(Offset + MFI.getObjectSize(FI), Align);
+ Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
if (AssignOffsets)
Assign(FI, -Offset);
}
@@ -2632,9 +2695,14 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
++MBBI;
// Create an UnwindHelp object.
- int UnwindHelpFI =
- MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
+ // The UnwindHelp object is allocated at the start of the fixed object area
+ int64_t FixedObject =
+ getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false);
+ int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8,
+ /*SPOffset*/ -FixedObject,
+ /*IsImmutable=*/false);
EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
+
// We need to store -2 into the UnwindHelp object at the start of the
// function.
DebugLoc DL;
@@ -2649,17 +2717,411 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
.addImm(0);
}
-/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
-/// the update. This is easily retrieved as it is exactly the offset that is set
-/// in processFunctionBeforeFrameFinalized.
+namespace {
+struct TagStoreInstr {
+ MachineInstr *MI;
+ int64_t Offset, Size;
+ explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
+ : MI(MI), Offset(Offset), Size(Size) {}
+};
+
+class TagStoreEdit {
+ MachineFunction *MF;
+ MachineBasicBlock *MBB;
+ MachineRegisterInfo *MRI;
+ // Tag store instructions that are being replaced.
+ SmallVector<TagStoreInstr, 8> TagStores;
+ // Combined memref arguments of the above instructions.
+ SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
+
+ // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
+ // FrameRegOffset + Size) with the address tag of SP.
+ Register FrameReg;
+ StackOffset FrameRegOffset;
+ int64_t Size;
+ // If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end.
+ Optional<int64_t> FrameRegUpdate;
+ // MIFlags for any FrameReg updating instructions.
+ unsigned FrameRegUpdateFlags;
+
+ // Use zeroing instruction variants.
+ bool ZeroData;
+ DebugLoc DL;
+
+ void emitUnrolled(MachineBasicBlock::iterator InsertI);
+ void emitLoop(MachineBasicBlock::iterator InsertI);
+
+public:
+ TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
+ : MBB(MBB), ZeroData(ZeroData) {
+ MF = MBB->getParent();
+ MRI = &MF->getRegInfo();
+ }
+ // Add an instruction to be replaced. Instructions must be added in the
+ // ascending order of Offset, and have to be adjacent.
+ void addInstruction(TagStoreInstr I) {
+ assert((TagStores.empty() ||
+ TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
+ "Non-adjacent tag store instructions.");
+ TagStores.push_back(I);
+ }
+ void clear() { TagStores.clear(); }
+ // Emit equivalent code at the given location, and erase the current set of
+ // instructions. May skip if the replacement is not profitable. May invalidate
+ // the input iterator and replace it with a valid one.
+ void emitCode(MachineBasicBlock::iterator &InsertI,
+ const AArch64FrameLowering *TFI, bool IsLast);
+};
+
+void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
+
+ const int64_t kMinOffset = -256 * 16;
+ const int64_t kMaxOffset = 255 * 16;
+
+ Register BaseReg = FrameReg;
+ int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes();
+ if (BaseRegOffsetBytes < kMinOffset ||
+ BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
+ Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
+ emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
+ {BaseRegOffsetBytes, MVT::i8}, TII);
+ BaseReg = ScratchReg;
+ BaseRegOffsetBytes = 0;
+ }
+
+ MachineInstr *LastI = nullptr;
+ while (Size) {
+ int64_t InstrSize = (Size > 16) ? 32 : 16;
+ unsigned Opcode =
+ InstrSize == 16
+ ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
+ : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
+ MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
+ .addReg(AArch64::SP)
+ .addReg(BaseReg)
+ .addImm(BaseRegOffsetBytes / 16)
+ .setMemRefs(CombinedMemRefs);
+ // A store to [BaseReg, #0] should go last for an opportunity to fold the
+ // final SP adjustment in the epilogue.
+ if (BaseRegOffsetBytes == 0)
+ LastI = I;
+ BaseRegOffsetBytes += InstrSize;
+ Size -= InstrSize;
+ }
+
+ if (LastI)
+ MBB->splice(InsertI, MBB, LastI);
+}
+
+void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
+
+ Register BaseReg = FrameRegUpdate
+ ? FrameReg
+ : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
+ Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
+
+ emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
+
+ int64_t LoopSize = Size;
+ // If the loop size is not a multiple of 32, split off one 16-byte store at
+ // the end to fold BaseReg update into.
+ if (FrameRegUpdate && *FrameRegUpdate)
+ LoopSize -= LoopSize % 32;
+ MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL,
+ TII->get(ZeroData ? AArch64::STZGloop_wback
+ : AArch64::STGloop_wback))
+ .addDef(SizeReg)
+ .addDef(BaseReg)
+ .addImm(LoopSize)
+ .addReg(BaseReg)
+ .setMemRefs(CombinedMemRefs);
+ if (FrameRegUpdate)
+ LoopI->setFlags(FrameRegUpdateFlags);
+
+ int64_t ExtraBaseRegUpdate =
+ FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0;
+ if (LoopSize < Size) {
+ assert(FrameRegUpdate);
+ assert(Size - LoopSize == 16);
+ // Tag 16 more bytes at BaseReg and update BaseReg.
+ BuildMI(*MBB, InsertI, DL,
+ TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
+ .addDef(BaseReg)
+ .addReg(BaseReg)
+ .addReg(BaseReg)
+ .addImm(1 + ExtraBaseRegUpdate / 16)
+ .setMemRefs(CombinedMemRefs)
+ .setMIFlags(FrameRegUpdateFlags);
+ } else if (ExtraBaseRegUpdate) {
+ // Update BaseReg.
+ BuildMI(
+ *MBB, InsertI, DL,
+ TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
+ .addDef(BaseReg)
+ .addReg(BaseReg)
+ .addImm(std::abs(ExtraBaseRegUpdate))
+ .addImm(0)
+ .setMIFlags(FrameRegUpdateFlags);
+ }
+}
+
+// Check if *II is a register update that can be merged into STGloop that ends
+// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
+// end of the loop.
+bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
+ int64_t Size, int64_t *TotalOffset) {
+ MachineInstr &MI = *II;
+ if ((MI.getOpcode() == AArch64::ADDXri ||
+ MI.getOpcode() == AArch64::SUBXri) &&
+ MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
+ unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
+ int64_t Offset = MI.getOperand(2).getImm() << Shift;
+ if (MI.getOpcode() == AArch64::SUBXri)
+ Offset = -Offset;
+ int64_t AbsPostOffset = std::abs(Offset - Size);
+ const int64_t kMaxOffset =
+ 0xFFF; // Max encoding for unshifted ADDXri / SUBXri
+ if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
+ *TotalOffset = Offset;
+ return true;
+ }
+ }
+ return false;
+}
+
+void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs) {
+ MemRefs.clear();
+ for (auto &TS : TSE) {
+ MachineInstr *MI = TS.MI;
+ // An instruction without memory operands may access anything. Be
+ // conservative and return an empty list.
+ if (MI->memoperands_empty()) {
+ MemRefs.clear();
+ return;
+ }
+ MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
+ }
+}
+
+void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
+ const AArch64FrameLowering *TFI, bool IsLast) {
+ if (TagStores.empty())
+ return;
+ TagStoreInstr &FirstTagStore = TagStores[0];
+ TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
+ Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
+ DL = TagStores[0].MI->getDebugLoc();
+
+ Register Reg;
+ FrameRegOffset = TFI->resolveFrameOffsetReference(
+ *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
+ /*PreferFP=*/false, /*ForSimm=*/true);
+ FrameReg = Reg;
+ FrameRegUpdate = None;
+
+ mergeMemRefs(TagStores, CombinedMemRefs);
+
+ LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n";
+ for (const auto &Instr
+ : TagStores) { dbgs() << " " << *Instr.MI; });
+
+ // Size threshold where a loop becomes shorter than a linear sequence of
+ // tagging instructions.
+ const int kSetTagLoopThreshold = 176;
+ if (Size < kSetTagLoopThreshold) {
+ if (TagStores.size() < 2)
+ return;
+ emitUnrolled(InsertI);
+ } else {
+ MachineInstr *UpdateInstr = nullptr;
+ int64_t TotalOffset;
+ if (IsLast) {
+ // See if we can merge base register update into the STGloop.
+ // This is done in AArch64LoadStoreOptimizer for "normal" stores,
+ // but STGloop is way too unusual for that, and also it only
+ // realistically happens in function epilogue. Also, STGloop is expanded
+ // before that pass.
+ if (InsertI != MBB->end() &&
+ canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size,
+ &TotalOffset)) {
+ UpdateInstr = &*InsertI++;
+ LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
+ << *UpdateInstr);
+ }
+ }
+
+ if (!UpdateInstr && TagStores.size() < 2)
+ return;
+
+ if (UpdateInstr) {
+ FrameRegUpdate = TotalOffset;
+ FrameRegUpdateFlags = UpdateInstr->getFlags();
+ }
+ emitLoop(InsertI);
+ if (UpdateInstr)
+ UpdateInstr->eraseFromParent();
+ }
+
+ for (auto &TS : TagStores)
+ TS.MI->eraseFromParent();
+}
+
+bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
+ int64_t &Size, bool &ZeroData) {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ unsigned Opcode = MI.getOpcode();
+ ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
+ Opcode == AArch64::STZ2GOffset);
+
+ if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
+ if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
+ return false;
+ if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
+ return false;
+ Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
+ Size = MI.getOperand(2).getImm();
+ return true;
+ }
+
+ if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
+ Size = 16;
+ else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
+ Size = 32;
+ else
+ return false;
+
+ if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
+ return false;
+
+ Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
+ 16 * MI.getOperand(2).getImm();
+ return true;
+}
+
+// Detect a run of memory tagging instructions for adjacent stack frame slots,
+// and replace them with a shorter instruction sequence:
+// * replace STG + STG with ST2G
+// * replace STGloop + STGloop with STGloop
+// This code needs to run when stack slot offsets are already known, but before
+// FrameIndex operands in STG instructions are eliminated.
+MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
+ const AArch64FrameLowering *TFI,
+ RegScavenger *RS) {
+ bool FirstZeroData;
+ int64_t Size, Offset;
+ MachineInstr &MI = *II;
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator NextI = ++II;
+ if (&MI == &MBB->instr_back())
+ return II;
+ if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
+ return II;
+
+ SmallVector<TagStoreInstr, 4> Instrs;
+ Instrs.emplace_back(&MI, Offset, Size);
+
+ constexpr int kScanLimit = 10;
+ int Count = 0;
+ for (MachineBasicBlock::iterator E = MBB->end();
+ NextI != E && Count < kScanLimit; ++NextI) {
+ MachineInstr &MI = *NextI;
+ bool ZeroData;
+ int64_t Size, Offset;
+ // Collect instructions that update memory tags with a FrameIndex operand
+ // and (when applicable) constant size, and whose output registers are dead
+ // (the latter is almost always the case in practice). Since these
+ // instructions effectively have no inputs or outputs, we are free to skip
+ // any non-aliasing instructions in between without tracking used registers.
+ if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
+ if (ZeroData != FirstZeroData)
+ break;
+ Instrs.emplace_back(&MI, Offset, Size);
+ continue;
+ }
+
+ // Only count non-transient, non-tagging instructions toward the scan
+ // limit.
+ if (!MI.isTransient())
+ ++Count;
+
+ // Just in case, stop before the epilogue code starts.
+ if (MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy))
+ break;
+
+ // Reject anything that may alias the collected instructions.
+ if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
+ break;
+ }
+
+ // New code will be inserted after the last tagging instruction we've found.
+ MachineBasicBlock::iterator InsertI = Instrs.back().MI;
+ InsertI++;
+
+ llvm::stable_sort(Instrs,
+ [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
+ return Left.Offset < Right.Offset;
+ });
+
+ // Make sure that we don't have any overlapping stores.
+ int64_t CurOffset = Instrs[0].Offset;
+ for (auto &Instr : Instrs) {
+ if (CurOffset > Instr.Offset)
+ return NextI;
+ CurOffset = Instr.Offset + Instr.Size;
+ }
+
+ // Find contiguous runs of tagged memory and emit shorter instruction
+ // sequencies for them when possible.
+ TagStoreEdit TSE(MBB, FirstZeroData);
+ Optional<int64_t> EndOffset;
+ for (auto &Instr : Instrs) {
+ if (EndOffset && *EndOffset != Instr.Offset) {
+ // Found a gap.
+ TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
+ TSE.clear();
+ }
+
+ TSE.addInstruction(Instr);
+ EndOffset = Instr.Offset + Instr.Size;
+ }
+
+ TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
+
+ return InsertI;
+}
+} // namespace
+
+void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
+ MachineFunction &MF, RegScavenger *RS = nullptr) const {
+ if (StackTaggingMergeSetTag)
+ for (auto &BB : MF)
+ for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();)
+ II = tryMergeAdjacentSTG(II, this, RS);
+}
+
+/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
+/// before the update. This is easily retrieved as it is exactly the offset
+/// that is set in processFunctionBeforeFrameFinalized.
int AArch64FrameLowering::getFrameIndexReferencePreferSP(
- const MachineFunction &MF, int FI, unsigned &FrameReg,
+ const MachineFunction &MF, int FI, Register &FrameReg,
bool IgnoreSPUpdates) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
- << MFI.getObjectOffset(FI) << "\n");
- FrameReg = AArch64::SP;
- return MFI.getObjectOffset(FI);
+ if (IgnoreSPUpdates) {
+ LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
+ << MFI.getObjectOffset(FI) << "\n");
+ FrameReg = AArch64::SP;
+ return MFI.getObjectOffset(FI);
+ }
+
+ return getFrameIndexReference(MF, FI, FrameReg);
}
/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
@@ -2678,5 +3140,5 @@ unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
// This is the amount of stack a funclet needs to allocate.
return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
- getStackAlignment());
+ getStackAlign());
}