summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64FrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp904
1 files changed, 767 insertions, 137 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 6dc5d19862a94..538a8d7e8fbcf 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -98,6 +98,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -114,11 +115,13 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -201,6 +204,11 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ // Win64 EH requires a frame pointer if funclets are present, as the locals
+ // are accessed off the frame pointer in both the parent function and the
+ // funclets.
+ if (MF.hasEHFunclets())
+ return true;
// Retain behavior of always omitting the FP for leaf functions when possible.
if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
return true;
@@ -279,6 +287,31 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
+static bool ShouldSignReturnAddress(MachineFunction &MF) {
+ // The function should be signed in the following situations:
+ // - sign-return-address=all
+ // - sign-return-address=non-leaf and the functions spills the LR
+
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("sign-return-address"))
+ return false;
+
+ StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("none"))
+ return false;
+
+ if (Scope.equals("all"))
+ return true;
+
+ assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
+
+ for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
+ if (Info.getReg() == AArch64::LR)
+ return true;
+
+ return false;
+}
+
void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
@@ -330,7 +363,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
+ const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@@ -408,54 +441,217 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
return true;
}
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo &TII,
+ MachineInstr::MIFlag Flag) {
+ unsigned Opc = MBBI->getOpcode();
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ DebugLoc DL = MBBI->getDebugLoc();
+ unsigned ImmIdx = MBBI->getNumOperands() - 1;
+ int Imm = MBBI->getOperand(ImmIdx).getImm();
+ MachineInstrBuilder MIB;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("No SEH Opcode for this instruction");
+ case AArch64::LDPDpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPDpre: {
+ unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
+ .addImm(Reg0)
+ .addImm(Reg1)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDPXpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPXpre: {
+ unsigned Reg0 = MBBI->getOperand(1).getReg();
+ unsigned Reg1 = MBBI->getOperand(2).getReg();
+ if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
+ .addImm(RegInfo->getSEHRegNum(Reg0))
+ .addImm(RegInfo->getSEHRegNum(Reg1))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDRDpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STRDpre: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
+ .addImm(Reg)
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::LDRXpost:
+ Imm = -Imm;
+ LLVM_FALLTHROUGH;
+ case AArch64::STRXpre: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
+ .addImm(Reg)
+ .addImm(Imm)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STPDi:
+ case AArch64::LDPDi: {
+ unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
+ .addImm(Reg0)
+ .addImm(Reg1)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STPXi:
+ case AArch64::LDPXi: {
+ unsigned Reg0 = MBBI->getOperand(0).getReg();
+ unsigned Reg1 = MBBI->getOperand(1).getReg();
+ if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ else
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
+ .addImm(RegInfo->getSEHRegNum(Reg0))
+ .addImm(RegInfo->getSEHRegNum(Reg1))
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STRXui:
+ case AArch64::LDRXui: {
+ int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ case AArch64::STRDui:
+ case AArch64::LDRDui: {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
+ .addImm(Reg)
+ .addImm(Imm * 8)
+ .setMIFlag(Flag);
+ break;
+ }
+ }
+ auto I = MBB->insertAfter(MBBI, MIB);
+ return I;
+}
+
+// Fix up the SEH opcode associated with the save/restore instruction.
+static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
+ unsigned LocalStackSize) {
+ MachineOperand *ImmOpnd = nullptr;
+ unsigned ImmIdx = MBBI->getNumOperands() - 1;
+ switch (MBBI->getOpcode()) {
+ default:
+ llvm_unreachable("Fix the offset in the SEH instruction");
+ case AArch64::SEH_SaveFPLR:
+ case AArch64::SEH_SaveRegP:
+ case AArch64::SEH_SaveReg:
+ case AArch64::SEH_SaveFRegP:
+ case AArch64::SEH_SaveFReg:
+ ImmOpnd = &MBBI->getOperand(ImmIdx);
+ break;
+ }
+ if (ImmOpnd)
+ ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
+}
+
// Convert callee-save register save/restore instruction to do stack pointer
// decrement/increment to allocate/deallocate the callee-save stack area by
// converting store/load to use pre/post increment version.
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
+ const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+ bool NeedsWinCFI, bool InProlog = true) {
// Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions.
+ // instructions and associated CFI instruction.
while (MBBI->getOpcode() == AArch64::STRXpost ||
- MBBI->getOpcode() == AArch64::LDRXpre) {
- assert(MBBI->getOperand(0).getReg() != AArch64::SP);
+ MBBI->getOpcode() == AArch64::LDRXpre ||
+ MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
+ if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
+ assert(MBBI->getOperand(0).getReg() != AArch64::SP);
++MBBI;
}
-
unsigned NewOpc;
- bool NewIsUnscaled = false;
+ int Scale = 1;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
case AArch64::STPXi:
NewOpc = AArch64::STPXpre;
+ Scale = 8;
break;
case AArch64::STPDi:
NewOpc = AArch64::STPDpre;
+ Scale = 8;
+ break;
+ case AArch64::STPQi:
+ NewOpc = AArch64::STPQpre;
+ Scale = 16;
break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
- NewIsUnscaled = true;
break;
case AArch64::STRDui:
NewOpc = AArch64::STRDpre;
- NewIsUnscaled = true;
+ break;
+ case AArch64::STRQui:
+ NewOpc = AArch64::STRQpre;
break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
+ Scale = 8;
break;
case AArch64::LDPDi:
NewOpc = AArch64::LDPDpost;
+ Scale = 8;
+ break;
+ case AArch64::LDPQi:
+ NewOpc = AArch64::LDPQpost;
+ Scale = 16;
break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
- NewIsUnscaled = true;
break;
case AArch64::LDRDui:
NewOpc = AArch64::LDRDpost;
- NewIsUnscaled = true;
+ break;
+ case AArch64::LDRQui:
+ NewOpc = AArch64::LDRQpost;
break;
}
+ // Get rid of the SEH code associated with the old instruction.
+ if (NeedsWinCFI) {
+ auto SEH = std::next(MBBI);
+ if (AArch64InstrInfo::isSEHInstruction(*SEH))
+ SEH->eraseFromParent();
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
MIB.addReg(AArch64::SP, RegState::Define);
@@ -471,15 +667,16 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
"instruction!");
assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
- // Last operand is immediate offset that needs fixing.
- assert(CSStackSizeInc % 8 == 0);
- int64_t CSStackSizeIncImm = CSStackSizeInc;
- if (!NewIsUnscaled)
- CSStackSizeIncImm /= 8;
- MIB.addImm(CSStackSizeIncImm);
+ assert(CSStackSizeInc % Scale == 0);
+ MIB.addImm(CSStackSizeInc / Scale);
MIB.setMIFlags(MBBI->getFlags());
- MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
+ MIB.setMemRefs(MBBI->memoperands());
+
+ // Generate a new SEH code that corresponds to the new instruction.
+ if (NeedsWinCFI)
+ InsertSEH(*MIB, *TII,
+ InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
return std::prev(MBB.erase(MBBI));
}
@@ -487,22 +684,43 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// Fixup callee-save register save/restore instructions to take into account
// combined SP bump by adding the local stack size to the stack offsets.
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
- unsigned LocalStackSize) {
+ unsigned LocalStackSize,
+ bool NeedsWinCFI) {
+ if (AArch64InstrInfo::isSEHInstruction(MI))
+ return;
+
unsigned Opc = MI.getOpcode();
// Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions.
- if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) {
- assert(MI.getOperand(0).getReg() != AArch64::SP);
+ // instructions and associated CFI instruction.
+ if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
+ Opc == AArch64::CFI_INSTRUCTION) {
+ if (Opc != AArch64::CFI_INSTRUCTION)
+ assert(MI.getOperand(0).getReg() != AArch64::SP);
return;
}
- (void)Opc;
- assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
- Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
- Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
- Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
- "Unexpected callee-save save/restore opcode!");
+ unsigned Scale;
+ switch (Opc) {
+ case AArch64::STPXi:
+ case AArch64::STRXui:
+ case AArch64::STPDi:
+ case AArch64::STRDui:
+ case AArch64::LDPXi:
+ case AArch64::LDRXui:
+ case AArch64::LDPDi:
+ case AArch64::LDRDui:
+ Scale = 8;
+ break;
+ case AArch64::STPQi:
+ case AArch64::STRQui:
+ case AArch64::LDPQi:
+ case AArch64::LDRQui:
+ Scale = 16;
+ break;
+ default:
+ llvm_unreachable("Unexpected callee-save save/restore opcode!");
+ }
unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
@@ -510,8 +728,16 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
// Last operand is immediate offset that needs fixing.
MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
// All generated opcodes have scaled offsets.
- assert(LocalStackSize % 8 == 0);
- OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
+ assert(LocalStackSize % Scale == 0);
+ OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
+
+ if (NeedsWinCFI) {
+ auto MBBI = std::next(MachineBasicBlock::iterator(MI));
+ assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
+ assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
+ "Expecting a SEH instruction");
+ fixupSEHOpcode(MBBI, LocalStackSize);
+ }
}
static void adaptForLdStOpt(MachineBasicBlock &MBB,
@@ -546,6 +772,23 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB,
//
}
+static bool ShouldSignWithAKey(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("sign-return-address-key"))
+ return true;
+
+ const StringRef Key =
+ F.getFnAttribute("sign-return-address-key").getValueAsString();
+ assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
+ return Key.equals_lower("a_key");
+}
+
+static bool needsWinCFI(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry();
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -556,8 +799,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
+ bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool HasFP = hasFP(MF);
+ bool NeedsWinCFI = needsWinCFI(MF);
+ MF.setHasWinCFI(NeedsWinCFI);
+ bool IsFunclet = MBB.isEHFuncletEntry();
// At this point, we're going to decide whether or not the function uses a
// redzone. In most cases, the function doesn't have a redzone so let's
@@ -568,18 +815,41 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
+ if (ShouldSignReturnAddress(MF)) {
+ if (ShouldSignWithAKey(MF))
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
- int NumBytes = (int)MFI.getStackSize();
+ // getStackSize() includes all the locals in its size calculation. We don't
+ // include these locals when computing the stack size of a funclet, as they
+ // are allocated in the parent's stack frame and accessed via the frame
+ // pointer from the funclet. We only save the callee saved registers in the
+ // funclet, which are really the callee saved registers of the parent
+ // function, including the funclet.
+ int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+ : (int)MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
-
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
-
if (!NumBytes)
return;
// REDZONE: If the stack size is less than 128 bytes, we don't need
@@ -589,36 +859,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
-
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
+ if (!NeedsWinCFI) {
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+ // Encode the stack size of the leaf function.
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
}
+
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+
return;
}
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ // Var args are accounted for in the containing function, so don't
+ // include them for funclets.
+ unsigned FixedObject = (IsWin64 && !IsFunclet) ?
+ alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
-
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
- MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
- -PrologueSaveSize);
+ MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -629,9 +907,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock::iterator End = MBB.end();
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
if (CombineSPBump)
- fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
+ fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
+ NeedsWinCFI);
++MBBI;
}
+
+ // The code below is not applicable to funclets. We have emitted all the SEH
+ // opcodes that we needed to emit. The FP and BP belong to the containing
+ // function.
+ if (IsFunclet) {
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ return;
+ }
+
if (HasFP) {
// Only set up FP if we actually need to. Frame pointer is fp =
// sp - fixedobject - 16.
@@ -644,24 +934,58 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 4;
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+ // exceed this amount. We need to move at most 2^24 - 1 into x15.
+ // This is at most two instructions, MOVZ follwed by MOVK.
+ // TODO: Fix to use multiple stack alloc unwind codes for stacks
+ // exceeding 256MB in size.
+ if (NumBytes >= (1 << 28))
+ report_fatal_error("Stack size cannot exceed 256MB for stack "
+ "unwinding purposes");
+
+ uint32_t LowNumWords = NumWords & 0xFFFF;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+ .addImm(LowNumWords)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ if ((NumWords & 0xFFFF0000) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+ .addReg(AArch64::X15)
+ .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Tiny:
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
.addExternalSymbol("__chkstk")
.addReg(AArch64::X15, RegState::Implicit)
+ .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
break;
case CodeModel::Large:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
@@ -669,11 +993,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addExternalSymbol("__chkstk")
.addExternalSymbol("__chkstk")
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
break;
}
@@ -682,6 +1015,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
.setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
NumBytes = 0;
}
@@ -701,7 +1038,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+ MachineInstr::FrameSetup, false, NeedsWinCFI);
if (NeedsRealignment) {
const unsigned Alignment = MFI.getMaxAlignment();
@@ -724,6 +1061,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(scratchSPReg, RegState::Kill)
.addImm(andMaskEncoded);
AFI->setStackRealigned(true);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes & andMaskEncoded)
+ .setMIFlag(MachineInstr::FrameSetup);
}
}
@@ -737,8 +1078,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (RegInfo->hasBasePointer(MF)) {
TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
false);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
}
+ // The very last FrameSetup instruction indicates the end of prologue. Emit a
+ // SEH opcode indicating the prologue end.
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
const int StackGrowth = -TD.getPointerSize(0);
@@ -832,6 +1182,46 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
+static void InsertReturnAddressAuth(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (!ShouldSignReturnAddress(MF))
+ return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc DL;
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ // The AUTIASP instruction assembles to a hint instruction before v8.3a so
+ // this instruction can safely used for any v8a architecture.
+ // From v8.3a onwards there are optimised authenticate LR and return
+ // instructions, namely RETA{A,B}, that can be used instead.
+ if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
+ MBBI->getOpcode() == AArch64::RET_ReallyLR) {
+ BuildMI(MBB, MBBI, DL,
+ TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
+ .copyImplicitOps(*MBBI);
+ MBB.erase(MBBI);
+ } else {
+ BuildMI(
+ MBB, MBBI, DL,
+ TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+static bool isFuncletReturnInstr(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case AArch64::CATCHRET:
+ case AArch64::CLEANUPRET:
+ return true;
+ }
+}
+
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -840,14 +1230,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool IsTailCallReturn = false;
+ bool NeedsWinCFI = needsWinCFI(MF);
+ bool IsFunclet = false;
+
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
unsigned RetOpcode = MBBI->getOpcode();
IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
- RetOpcode == AArch64::TCRETURNri;
+ RetOpcode == AArch64::TCRETURNri ||
+ RetOpcode == AArch64::TCRETURNriBTI;
+ IsFunclet = isFuncletReturnInstr(*MBBI);
}
- int NumBytes = MFI.getStackSize();
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+ : MFI.getStackSize();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -899,25 +1296,38 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
// it as the 2nd argument of AArch64ISD::TC_RETURN.
+ auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
+
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
- unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+ // Var args are accounted for in the containing function, so don't
+ // include them for funclets.
+ unsigned FixedObject =
+ (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
uint64_t AfterCSRPopSize = ArgumentPopSize;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+ // We cannot rely on the local stack size set in emitPrologue if the function
+ // has funclets, as funclets have different local stack size requirements, and
+ // the current value set in emitPrologue may be that of the containing
+ // function.
+ if (MF.hasEHFunclets())
+ AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
+ while (AArch64InstrInfo::isSEHInstruction(*Pop))
+ Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
// If the offset is 0, convert it to a post-index ldp.
- if (OffsetOp.getImm() == 0) {
- convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII,
- PrologueSaveSize);
- } else {
+ if (OffsetOp.getImm() == 0)
+ convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false);
+ else {
// If not, make sure to emit an add after the last ldp.
// We're doing this by transfering the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
@@ -937,14 +1347,23 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
++LastPopI;
break;
} else if (CombineSPBump)
- fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
+ fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
+ NeedsWinCFI);
}
+ if (NeedsWinCFI)
+ BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- NumBytes + AfterCSRPopSize, TII,
- MachineInstr::FrameDestroy);
+ NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
return;
}
@@ -972,9 +1391,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackRestoreBytes, TII, MachineInstr::FrameDestroy);
- if (Done)
+ StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI);
+ if (Done) {
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL,
+ TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
return;
+ }
NumBytes = 0;
}
@@ -983,13 +1408,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
- if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
+ if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-AFI->getCalleeSavedStackSize() + 16, TII,
- MachineInstr::FrameDestroy);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
- MachineInstr::FrameDestroy);
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
@@ -1010,8 +1435,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
+ AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI);
}
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1084,6 +1513,14 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
UseFP = true;
+ } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
+ // Funclets access the locals contained in the parent's stack frame
+ // via the frame pointer, so we have to use the FP in the parent
+ // function.
+ assert(
+ Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
+ "Funclets should only be present on Win64");
+ UseFP = true;
} else {
// We have the choice between FP and (SP or BP).
if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
@@ -1136,6 +1573,23 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
Attrs.hasAttrSomewhere(Attribute::SwiftError));
}
+static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
+ bool NeedsWinCFI) {
+ // If we are generating register pairs for a Windows function that requires
+ // EH support, then pair consecutive registers only. There are no unwind
+ // opcodes for saves/restores of non-consectuve register pairs.
+ // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
+ // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+
+ // TODO: LR can be paired with any register. We don't support this yet in
+ // the MCLayer. We need to add support for the save_lrpair unwind code.
+ if (!NeedsWinCFI)
+ return false;
+ if (Reg2 == Reg1 + 1)
+ return false;
+ return true;
+}
+
namespace {
struct RegPairInfo {
@@ -1143,7 +1597,7 @@ struct RegPairInfo {
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- bool IsGPR;
+ enum RegType { GPR, FPR64, FPR128 } Type;
RegPairInfo() = default;
@@ -1160,6 +1614,7 @@ static void computeCalleeSaveRegisterPairs(
if (CSI.empty())
return;
+ bool NeedsWinCFI = needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineFrameInfo &MFI = MF.getFrameInfo();
CallingConv::ID CC = MF.getFunction().getCallingConv();
@@ -1172,28 +1627,50 @@ static void computeCalleeSaveRegisterPairs(
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int Offset = AFI->getCalleeSavedStackSize();
-
+ // On Linux, we will have either one or zero non-paired register. On Windows
+ // with CFI, we can have multiple unpaired registers in order to utilize the
+ // available unwind codes. This flag assures that the alignment fixup is done
+ // only once, as intened.
+ bool FixupDone = false;
for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
- assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
- AArch64::FPR64RegClass.contains(RPI.Reg1));
- RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
+ if (AArch64::GPR64RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::GPR;
+ else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::FPR64;
+ else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::FPR128;
+ else
+ llvm_unreachable("Unsupported register class.");
// Add the next reg to the pair if it is in the same register class.
if (i + 1 < Count) {
unsigned NextReg = CSI[i + 1].getReg();
- if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
- (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
- RPI.Reg2 = NextReg;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ if (AArch64::GPR64RegClass.contains(NextReg) &&
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ RPI.Reg2 = NextReg;
+ break;
+ case RegPairInfo::FPR64:
+ if (AArch64::FPR64RegClass.contains(NextReg) &&
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ RPI.Reg2 = NextReg;
+ break;
+ case RegPairInfo::FPR128:
+ if (AArch64::FPR128RegClass.contains(NextReg))
+ RPI.Reg2 = NextReg;
+ break;
+ }
}
// If either of the registers to be saved is the lr register, it means that
// we also need to save lr in the shadow call stack.
if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
- if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+ if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
report_fatal_error("Must reserve x18 to use shadow call stack");
NeedShadowCallStackProlog = true;
}
@@ -1219,17 +1696,22 @@ static void computeCalleeSaveRegisterPairs(
RPI.FrameIdx = CSI[i].getFrameIdx();
- if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
- // Round up size of non-pair to pair size if we need to pad the
- // callee-save area to ensure 16-byte alignment.
- Offset -= 16;
+ int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
+ Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+ // Round up size of non-pair to pair size if we need to pad the
+ // callee-save area to ensure 16-byte alignment.
+ if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
+ RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+ FixupDone = true;
+ Offset -= 8;
+ assert(Offset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
- AFI->setCalleeSaveStackHasFreeSpace(true);
- } else
- Offset -= RPI.isPaired() ? 16 : 8;
- assert(Offset % 8 == 0);
- RPI.Offset = Offset / 8;
+ }
+
+ assert(Offset % Scale == 0);
+ RPI.Offset = Offset / Scale;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
"Offset out of bounds for LDP/STP immediate");
@@ -1245,6 +1727,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ bool NeedsWinCFI = needsWinCFI(MF);
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
@@ -1262,6 +1745,27 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
.addImm(8)
.setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI)
+ BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst));
+ BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// This instruction also makes x18 live-in to the entry block.
MBB.addLiveIn(AArch64::X18);
}
@@ -1283,16 +1787,41 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
- if (RPI.IsGPR)
- StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
- else
- StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+ unsigned Size, Align;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR64:
+ StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR128:
+ StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
+ Size = 16;
+ Align = 16;
+ break;
+ }
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
+ assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
+ "Windows unwdinding requires a consecutive (FP,LR) pair");
+ // Windows unwind codes require consecutive registers if registers are
+ // paired. Make the switch here, so that the code below will save (x,x+1)
+ // and not (x+1,x).
+ unsigned FrameIdxReg1 = RPI.FrameIdx;
+ unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+ if (NeedsWinCFI && RPI.isPaired()) {
+ std::swap(Reg1, Reg2);
+ std::swap(FrameIdxReg1, FrameIdxReg2);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (!MRI.isReserved(Reg1))
MBB.addLiveIn(Reg1);
@@ -1301,16 +1830,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
- MachineMemOperand::MOStore, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+ MachineMemOperand::MOStore, Size, Align));
}
MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
- .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*scale],
+ // where factor*scale is implicit
.setMIFlag(MachineInstr::FrameSetup);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
- MachineMemOperand::MOStore, 8, 8));
+ MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
+ MachineMemOperand::MOStore, Size, Align));
+ if (NeedsWinCFI)
+ InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+
}
return true;
}
@@ -1323,6 +1856,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
+ bool NeedsWinCFI = needsWinCFI(MF);
if (MI != MBB.end())
DL = MI->getDebugLoc();
@@ -1344,32 +1878,57 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// ldp x22, x21, [sp, #0] // addImm(+0)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
- if (RPI.IsGPR)
- LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
- else
- LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+ unsigned Size, Align;
+ switch (RPI.Type) {
+ case RegPairInfo::GPR:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR64:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+ Size = 8;
+ Align = 8;
+ break;
+ case RegPairInfo::FPR128:
+ LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
+ Size = 16;
+ Align = 16;
+ break;
+ }
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
dbgs() << ") -> fi#(" << RPI.FrameIdx;
if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
dbgs() << ")\n");
+ // Windows unwind codes require consecutive registers if registers are
+ // paired. Make the switch here, so that the code below will save (x,x+1)
+ // and not (x+1,x).
+ unsigned FrameIdxReg1 = RPI.FrameIdx;
+ unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+ if (NeedsWinCFI && RPI.isPaired()) {
+ std::swap(Reg1, Reg2);
+ std::swap(FrameIdxReg1, FrameIdxReg2);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
if (RPI.isPaired()) {
MIB.addReg(Reg2, getDefRegState(true));
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
- MachineMemOperand::MOLoad, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+ MachineMemOperand::MOLoad, Size, Align));
}
MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
- .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+ .addImm(RPI.Offset) // [sp, #offset*scale]
+ // where factor*scale is implicit
.setMIFlag(MachineInstr::FrameDestroy);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
- MachineMemOperand::MOLoad, 8, 8));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
+ MachineMemOperand::MOLoad, Size, Align));
+ if (NeedsWinCFI)
+ InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
-
if (ReverseCSRRestoreSeq)
for (const RegPairInfo &RPI : reverse(RegPairs))
EmitMI(RPI);
@@ -1406,30 +1965,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
MachineFrameInfo &MFI = MF.getFrameInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
? RegInfo->getBaseRegister()
: (unsigned)AArch64::NoRegister;
- unsigned SpillEstimate = SavedRegs.count();
- for (unsigned i = 0; CSRegs[i]; ++i) {
- unsigned Reg = CSRegs[i];
- unsigned PairedReg = CSRegs[i ^ 1];
- if (Reg == BasePointerReg)
- SpillEstimate++;
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
- SpillEstimate++;
- }
- SpillEstimate += 2; // Conservatively include FP+LR in the estimate
- unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
-
- // The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
- SavedRegs.set(AArch64::FP);
- SavedRegs.set(AArch64::LR);
- }
-
unsigned ExtraCSSpill = 0;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1453,7 +1994,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
- if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
+ if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
+ !SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
!RegInfo->isReservedReg(MF, PairedReg))
@@ -1461,6 +2003,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Calculates the callee saved stack size.
+ unsigned CSStackSize = 0;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned Reg : SavedRegs.set_bits())
+ CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+
+ // Save number of saved regs, so we can easily update CSStackSize later.
+ unsigned NumSavedRegs = SavedRegs.count();
+
+ // The frame record needs to be created by saving the appropriate registers
+ unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
+ if (hasFP(MF) ||
+ windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
+ SavedRegs.set(AArch64::FP);
+ SavedRegs.set(AArch64::LR);
+ }
+
LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
for (unsigned Reg
: SavedRegs.set_bits()) dbgs()
@@ -1468,15 +2028,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumRegsSpilled = SavedRegs.count();
- bool CanEliminateFrame = NumRegsSpilled == 0;
+ bool CanEliminateFrame = SavedRegs.count() == 0;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
- unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
- LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
- bool BigStack = (CFSize > EstimatedStackSizeLimit);
+ bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
@@ -1497,7 +2054,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (produceCompactUnwindFrame(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPRPaired;
- NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
@@ -1514,9 +2070,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Adding the size of additional 64bit GPR saves.
+ CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
+ unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
+ LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
+ << EstimatedStackSize + AlignedCSStackSize
+ << " bytes.\n");
+
// Round up to register pair alignment to avoid additional SP adjustment
// instructions.
- AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+ AFI->setCalleeSavedStackSize(AlignedCSStackSize);
+ AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
}
bool AArch64FrameLowering::enableStackSlotScavenging(
@@ -1524,3 +2088,69 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasCalleeSaveStackFreeSpace();
}
+
+void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ // If this function isn't doing Win64-style C++ EH, we don't need to do
+ // anything.
+ if (!MF.hasEHFunclets())
+ return;
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
+
+ MachineBasicBlock &MBB = MF.front();
+ auto MBBI = MBB.begin();
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ if (MBBI->isTerminator())
+ return;
+
+ // Create an UnwindHelp object.
+ int UnwindHelpFI =
+ MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
+ EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
+ // We need to store -2 into the UnwindHelp object at the start of the
+ // function.
+ DebugLoc DL;
+ RS->enterBasicBlock(MBB);
+ unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
+ .addReg(DstReg, getKillRegState(true))
+ .addFrameIndex(UnwindHelpFI)
+ .addImm(0);
+}
+
+/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
+/// the update. This is easily retrieved as it is exactly the offset that is set
+/// in processFunctionBeforeFrameFinalized.
+int AArch64FrameLowering::getFrameIndexReferencePreferSP(
+ const MachineFunction &MF, int FI, unsigned &FrameReg,
+ bool IgnoreSPUpdates) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
+ << MFI.getObjectOffset(FI) << "\n");
+ FrameReg = AArch64::SP;
+ return MFI.getObjectOffset(FI);
+}
+
+/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
+/// the parent's frame pointer
+unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
+ const MachineFunction &MF) const {
+ return 0;
+}
+
+/// Funclets only need to account for space for the callee saved registers,
+/// as the locals are accounted for in the parent's stack frame.
+unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
+ const MachineFunction &MF) const {
+ // This is the size of the pushed CSRs.
+ unsigned CSSize =
+ MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
+ // This is the amount of stack a funclet needs to allocate.
+ return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
+ getStackAlignment());
+}