diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
| commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
| tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Target/AArch64/AArch64FrameLowering.cpp | |
| parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
Notes
Diffstat (limited to 'lib/Target/AArch64/AArch64FrameLowering.cpp')
| -rw-r--r-- | lib/Target/AArch64/AArch64FrameLowering.cpp | 902 |
1 files changed, 766 insertions, 136 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 6dc5d19862a9..538a8d7e8fbc 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -98,6 +98,7 @@ #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -114,11 +115,13 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -201,6 +204,11 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + // Win64 EH requires a frame pointer if funclets are present, as the locals + // are accessed off the frame pointer in both the parent function and the + // funclets. + if (MF.hasEHFunclets()) + return true; // Retain behavior of always omitting the FP for leaf functions when possible. if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF)) return true; @@ -279,6 +287,31 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(I); } +static bool ShouldSignReturnAddress(MachineFunction &MF) { + // The function should be signed in the following situations: + // - sign-return-address=all + // - sign-return-address=non-leaf and the functions spills the LR + + const Function &F = MF.getFunction(); + if (!F.hasFnAttribute("sign-return-address")) + return false; + + StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString(); + if (Scope.equals("none")) + return false; + + if (Scope.equals("all")) + return true; + + assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf"); + + for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo()) + if (Info.getReg() == AArch64::LR) + return true; + + return false; +} + void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineFunction &MF = *MBB.getParent(); @@ -330,7 +363,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { LiveRegs.addLiveIns(*MBB); // Mark callee saved registers as used so we will not choose them. - const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); + const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); @@ -408,54 +441,217 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( return true; } +// Given a load or a store instruction, generate an appropriate unwinding SEH +// code on Windows. +static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, + const TargetInstrInfo &TII, + MachineInstr::MIFlag Flag) { + unsigned Opc = MBBI->getOpcode(); + MachineBasicBlock *MBB = MBBI->getParent(); + MachineFunction &MF = *MBB->getParent(); + DebugLoc DL = MBBI->getDebugLoc(); + unsigned ImmIdx = MBBI->getNumOperands() - 1; + int Imm = MBBI->getOperand(ImmIdx).getImm(); + MachineInstrBuilder MIB; + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + + switch (Opc) { + default: + llvm_unreachable("No SEH Opcode for this instruction"); + case AArch64::LDPDpost: + Imm = -Imm; + LLVM_FALLTHROUGH; + case AArch64::STPDpre: { + unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); + unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X)) + .addImm(Reg0) + .addImm(Reg1) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + case AArch64::LDPXpost: + Imm = -Imm; + LLVM_FALLTHROUGH; + case AArch64::STPXpre: { + unsigned Reg0 = MBBI->getOperand(1).getReg(); + unsigned Reg1 = MBBI->getOperand(2).getReg(); + if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) + .addImm(Imm * 8) + .setMIFlag(Flag); + else + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X)) + .addImm(RegInfo->getSEHRegNum(Reg0)) + .addImm(RegInfo->getSEHRegNum(Reg1)) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + case AArch64::LDRDpost: + Imm = -Imm; + LLVM_FALLTHROUGH; + case AArch64::STRDpre: { + unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::LDRXpost: + Imm = -Imm; + LLVM_FALLTHROUGH; + case AArch64::STRXpre: { + unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STPDi: + case AArch64::LDPDi: { + unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); + unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP)) + .addImm(Reg0) + .addImm(Reg1) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + case AArch64::STPXi: + case AArch64::LDPXi: { + unsigned Reg0 = MBBI->getOperand(0).getReg(); + unsigned Reg1 = MBBI->getOperand(1).getReg(); + if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) + .addImm(Imm * 8) + .setMIFlag(Flag); + else + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP)) + .addImm(RegInfo->getSEHRegNum(Reg0)) + .addImm(RegInfo->getSEHRegNum(Reg1)) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + case AArch64::STRXui: + case AArch64::LDRXui: { + int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg)) + .addImm(Reg) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + case AArch64::STRDui: + case AArch64::LDRDui: { + unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg)) + .addImm(Reg) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + } + auto I = MBB->insertAfter(MBBI, MIB); + return I; +} + +// Fix up the SEH opcode associated with the save/restore instruction. +static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, + unsigned LocalStackSize) { + MachineOperand *ImmOpnd = nullptr; + unsigned ImmIdx = MBBI->getNumOperands() - 1; + switch (MBBI->getOpcode()) { + default: + llvm_unreachable("Fix the offset in the SEH instruction"); + case AArch64::SEH_SaveFPLR: + case AArch64::SEH_SaveRegP: + case AArch64::SEH_SaveReg: + case AArch64::SEH_SaveFRegP: + case AArch64::SEH_SaveFReg: + ImmOpnd = &MBBI->getOperand(ImmIdx); + break; + } + if (ImmOpnd) + ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); +} + // Convert callee-save register save/restore instruction to do stack pointer // decrement/increment to allocate/deallocate the callee-save stack area by // converting store/load to use pre/post increment version. static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { + const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, + bool NeedsWinCFI, bool InProlog = true) { // Ignore instructions that do not operate on SP, i.e. shadow call stack - // instructions. + // instructions and associated CFI instruction. while (MBBI->getOpcode() == AArch64::STRXpost || - MBBI->getOpcode() == AArch64::LDRXpre) { - assert(MBBI->getOperand(0).getReg() != AArch64::SP); + MBBI->getOpcode() == AArch64::LDRXpre || + MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) { + if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION) + assert(MBBI->getOperand(0).getReg() != AArch64::SP); ++MBBI; } - unsigned NewOpc; - bool NewIsUnscaled = false; + int Scale = 1; switch (MBBI->getOpcode()) { default: llvm_unreachable("Unexpected callee-save save/restore opcode!"); case AArch64::STPXi: NewOpc = AArch64::STPXpre; + Scale = 8; break; case AArch64::STPDi: NewOpc = AArch64::STPDpre; + Scale = 8; + break; + case AArch64::STPQi: + NewOpc = AArch64::STPQpre; + Scale = 16; break; case AArch64::STRXui: NewOpc = AArch64::STRXpre; - NewIsUnscaled = true; break; case AArch64::STRDui: NewOpc = AArch64::STRDpre; - NewIsUnscaled = true; + break; + case AArch64::STRQui: + NewOpc = AArch64::STRQpre; break; case AArch64::LDPXi: NewOpc = AArch64::LDPXpost; + Scale = 8; break; case AArch64::LDPDi: NewOpc = AArch64::LDPDpost; + Scale = 8; + break; + case AArch64::LDPQi: + NewOpc = AArch64::LDPQpost; + Scale = 16; break; case AArch64::LDRXui: NewOpc = AArch64::LDRXpost; - NewIsUnscaled = true; break; case AArch64::LDRDui: NewOpc = AArch64::LDRDpost; - NewIsUnscaled = true; + break; + case AArch64::LDRQui: + NewOpc = AArch64::LDRQpost; break; } + // Get rid of the SEH code associated with the old instruction. + if (NeedsWinCFI) { + auto SEH = std::next(MBBI); + if (AArch64InstrInfo::isSEHInstruction(*SEH)) + SEH->eraseFromParent(); + } MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); MIB.addReg(AArch64::SP, RegState::Define); @@ -471,15 +667,16 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( "instruction!"); assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && "Unexpected base register in callee-save save/restore instruction!"); - // Last operand is immediate offset that needs fixing. - assert(CSStackSizeInc % 8 == 0); - int64_t CSStackSizeIncImm = CSStackSizeInc; - if (!NewIsUnscaled) - CSStackSizeIncImm /= 8; - MIB.addImm(CSStackSizeIncImm); + assert(CSStackSizeInc % Scale == 0); + MIB.addImm(CSStackSizeInc / Scale); MIB.setMIFlags(MBBI->getFlags()); - MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); + MIB.setMemRefs(MBBI->memoperands()); + + // Generate a new SEH code that corresponds to the new instruction. + if (NeedsWinCFI) + InsertSEH(*MIB, *TII, + InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy); return std::prev(MBB.erase(MBBI)); } @@ -487,22 +684,43 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, - unsigned LocalStackSize) { + unsigned LocalStackSize, + bool NeedsWinCFI) { + if (AArch64InstrInfo::isSEHInstruction(MI)) + return; + unsigned Opc = MI.getOpcode(); // Ignore instructions that do not operate on SP, i.e. shadow call stack - // instructions. - if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) { - assert(MI.getOperand(0).getReg() != AArch64::SP); + // instructions and associated CFI instruction. + if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre || + Opc == AArch64::CFI_INSTRUCTION) { + if (Opc != AArch64::CFI_INSTRUCTION) + assert(MI.getOperand(0).getReg() != AArch64::SP); return; } - (void)Opc; - assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || - Opc == AArch64::STRXui || Opc == AArch64::STRDui || - Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || - Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && - "Unexpected callee-save save/restore opcode!"); + unsigned Scale; + switch (Opc) { + case AArch64::STPXi: + case AArch64::STRXui: + case AArch64::STPDi: + case AArch64::STRDui: + case AArch64::LDPXi: + case AArch64::LDRXui: + case AArch64::LDPDi: + case AArch64::LDRDui: + Scale = 8; + break; + case AArch64::STPQi: + case AArch64::STRQui: + case AArch64::LDPQi: + case AArch64::LDRQui: + Scale = 16; + break; + default: + llvm_unreachable("Unexpected callee-save save/restore opcode!"); + } unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && @@ -510,8 +728,16 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, // Last operand is immediate offset that needs fixing. MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); // All generated opcodes have scaled offsets. - assert(LocalStackSize % 8 == 0); - OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); + assert(LocalStackSize % Scale == 0); + OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale); + + if (NeedsWinCFI) { + auto MBBI = std::next(MachineBasicBlock::iterator(MI)); + assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction"); + assert(AArch64InstrInfo::isSEHInstruction(*MBBI) && + "Expecting a SEH instruction"); + fixupSEHOpcode(MBBI, LocalStackSize); + } } static void adaptForLdStOpt(MachineBasicBlock &MBB, @@ -546,6 +772,23 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB, // } +static bool ShouldSignWithAKey(MachineFunction &MF) { + const Function &F = MF.getFunction(); + if (!F.hasFnAttribute("sign-return-address-key")) + return true; + + const StringRef Key = + F.getFnAttribute("sign-return-address-key").getValueAsString(); + assert(Key.equals_lower("a_key") || Key.equals_lower("b_key")); + return Key.equals_lower("a_key"); +} + +static bool needsWinCFI(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + F.needsUnwindTableEntry(); +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -556,8 +799,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); + bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) && + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool HasFP = hasFP(MF); + bool NeedsWinCFI = needsWinCFI(MF); + MF.setHasWinCFI(NeedsWinCFI); + bool IsFunclet = MBB.isEHFuncletEntry(); // At this point, we're going to decide whether or not the function uses a // redzone. In most cases, the function doesn't have a redzone so let's @@ -568,18 +815,41 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // to determine the end of the prologue. DebugLoc DL; + if (ShouldSignReturnAddress(MF)) { + if (ShouldSignWithAKey(MF)) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP)) + .setMIFlag(MachineInstr::FrameSetup); + else { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY)) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP)) + .setMIFlag(MachineInstr::FrameSetup); + } + + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; - int NumBytes = (int)MFI.getStackSize(); + // getStackSize() includes all the locals in its size calculation. We don't + // include these locals when computing the stack size of a funclet, as they + // are allocated in the parent's stack frame and accessed via the frame + // pointer from the funclet. We only save the callee saved registers in the + // funclet, which are really the callee saved registers of the parent + // function, including the funclet. + int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) + : (int)MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); - // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); - if (!NumBytes) return; // REDZONE: If the stack size is less than 128 bytes, we don't need @@ -589,36 +859,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++NumRedZoneFunctions; } else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); - - // Label used to tie together the PROLOG_LABEL and the MachineMoves. - MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); - // Encode the stack size of the leaf function. - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); + if (!NeedsWinCFI) { + // Label used to tie together the PROLOG_LABEL and the MachineMoves. + MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); + // Encode the stack size of the leaf function. + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } } + + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + return; } bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + // Var args are accounted for in the containing function, so don't + // include them for funclets. + unsigned FixedObject = (IsWin64 && !IsFunclet) ? + alignTo(AFI->getVarArgsGPRSize(), 16) : 0; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes - PrologueSaveSize); - bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (CombineSPBump) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); NumBytes = 0; } else if (PrologueSaveSize != 0) { - MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, - -PrologueSaveSize); + MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( + MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI); NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -629,9 +907,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock::iterator End = MBB.end(); while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); + fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), + NeedsWinCFI); ++MBBI; } + + // The code below is not applicable to funclets. We have emitted all the SEH + // opcodes that we needed to emit. The FP and BP belong to the containing + // function. + if (IsFunclet) { + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + return; + } + if (HasFP) { // Only set up FP if we actually need to. Frame pointer is fp = // sp - fixedobject - 16. @@ -644,24 +934,58 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { uint32_t NumWords = NumBytes >> 4; + if (NeedsWinCFI) { + // alloc_l can hold at most 256MB, so assume that NumBytes doesn't + // exceed this amount. We need to move at most 2^24 - 1 into x15. + // This is at most two instructions, MOVZ follwed by MOVK. + // TODO: Fix to use multiple stack alloc unwind codes for stacks + // exceeding 256MB in size. + if (NumBytes >= (1 << 28)) + report_fatal_error("Stack size cannot exceed 256MB for stack " + "unwinding purposes"); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) - .addImm(NumWords) - .setMIFlags(MachineInstr::FrameSetup); + uint32_t LowNumWords = NumWords & 0xFFFF; + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15) + .addImm(LowNumWords) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); + if ((NumWords & 0xFFFF0000) != 0) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15) + .addReg(AArch64::X15) + .addImm((NumWords & 0xFFFF0000) >> 16) // High half + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16)) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); + } + } else { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup); + } switch (MF.getTarget().getCodeModel()) { + case CodeModel::Tiny: case CodeModel::Small: case CodeModel::Medium: case CodeModel::Kernel: BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) .addExternalSymbol("__chkstk") .addReg(AArch64::X15, RegState::Implicit) + .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) .setMIFlags(MachineInstr::FrameSetup); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); break; case CodeModel::Large: BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) @@ -669,11 +993,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .addExternalSymbol("__chkstk") .addExternalSymbol("__chkstk") .setMIFlags(MachineInstr::FrameSetup); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR)) .addReg(AArch64::X16, RegState::Kill) .addReg(AArch64::X15, RegState::Implicit | RegState::Define) + .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) .setMIFlags(MachineInstr::FrameSetup); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); break; } @@ -682,6 +1015,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .addReg(AArch64::X15, RegState::Kill) .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) .setMIFlags(MachineInstr::FrameSetup); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); NumBytes = 0; } @@ -701,7 +1038,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); if (NeedsRealignment) { const unsigned Alignment = MFI.getMaxAlignment(); @@ -724,6 +1061,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); AFI->setStackRealigned(true); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) + .addImm(NumBytes & andMaskEncoded) + .setMIFlag(MachineInstr::FrameSetup); } } @@ -737,8 +1078,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); } + // The very last FrameSetup instruction indicates the end of prologue. Emit a + // SEH opcode indicating the prologue end. + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = -TD.getPointerSize(0); @@ -832,6 +1182,46 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } } +static void InsertReturnAddressAuth(MachineFunction &MF, + MachineBasicBlock &MBB) { + if (!ShouldSignReturnAddress(MF)) + return; + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + // The AUTIASP instruction assembles to a hint instruction before v8.3a so + // this instruction can safely used for any v8a architecture. + // From v8.3a onwards there are optimised authenticate LR and return + // instructions, namely RETA{A,B}, that can be used instead. + if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() && + MBBI->getOpcode() == AArch64::RET_ReallyLR) { + BuildMI(MBB, MBBI, DL, + TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB)) + .copyImplicitOps(*MBBI); + MBB.erase(MBBI); + } else { + BuildMI( + MBB, MBBI, DL, + TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP)) + .setMIFlag(MachineInstr::FrameDestroy); + } +} + +static bool isFuncletReturnInstr(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::CATCHRET: + case AArch64::CLEANUPRET: + return true; + } +} + void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -840,14 +1230,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; + bool NeedsWinCFI = needsWinCFI(MF); + bool IsFunclet = false; + if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || - RetOpcode == AArch64::TCRETURNri; + RetOpcode == AArch64::TCRETURNri || + RetOpcode == AArch64::TCRETURNriBTI; + IsFunclet = isFuncletReturnInstr(*MBBI); } - int NumBytes = MFI.getStackSize(); - const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + + int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) + : MFI.getStackSize(); + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -899,25 +1296,38 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. + auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); }); + bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + // Var args are accounted for in the containing function, so don't + // include them for funclets. + unsigned FixedObject = + (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; + // We cannot rely on the local stack size set in emitPrologue if the function + // has funclets, as funclets have different local stack size requirements, and + // the current value set in emitPrologue may be that of the containing + // function. + if (MF.hasEHFunclets()) + AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); // Assume we can't combine the last pop with the sp restore. if (!CombineSPBump && PrologueSaveSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); + while (AArch64InstrInfo::isSEHInstruction(*Pop)) + Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); // If the offset is 0, convert it to a post-index ldp. - if (OffsetOp.getImm() == 0) { - convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, - PrologueSaveSize); - } else { + if (OffsetOp.getImm() == 0) + convertCalleeSaveRestoreToSPPrePostIncDec( + MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false); + else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR @@ -937,14 +1347,23 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, ++LastPopI; break; } else if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); + fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), + NeedsWinCFI); } + if (NeedsWinCFI) + BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) + .setMIFlag(MachineInstr::FrameDestroy); + // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - NumBytes + AfterCSRPopSize, TII, - MachineInstr::FrameDestroy); + NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI); + if (NeedsWinCFI) + BuildMI(MBB, MBB.getFirstTerminator(), DL, + TII->get(AArch64::SEH_EpilogEnd)) + .setMIFlag(MachineInstr::FrameDestroy); return; } @@ -972,9 +1391,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy); - if (Done) + StackRestoreBytes, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI); + if (Done) { + if (NeedsWinCFI) + BuildMI(MBB, MBB.getFirstTerminator(), DL, + TII->get(AArch64::SEH_EpilogEnd)) + .setMIFlag(MachineInstr::FrameDestroy); return; + } NumBytes = 0; } @@ -983,13 +1408,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. - if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) + if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -AFI->getCalleeSavedStackSize() + 16, TII, - MachineInstr::FrameDestroy); + MachineInstr::FrameDestroy, false, NeedsWinCFI); else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, - MachineInstr::FrameDestroy); + MachineInstr::FrameDestroy, false, NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save @@ -1010,8 +1435,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - AfterCSRPopSize, TII, MachineInstr::FrameDestroy); + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI); } + if (NeedsWinCFI) + BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) + .setMIFlag(MachineInstr::FrameDestroy); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -1084,6 +1513,14 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. UseFP = true; + } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) { + // Funclets access the locals contained in the parent's stack frame + // via the frame pointer, so we have to use the FP in the parent + // function. + assert( + Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) && + "Funclets should only be present on Win64"); + UseFP = true; } else { // We have the choice between FP and (SP or BP). if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. @@ -1136,6 +1573,23 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { Attrs.hasAttrSomewhere(Attribute::SwiftError)); } +static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, + bool NeedsWinCFI) { + // If we are generating register pairs for a Windows function that requires + // EH support, then pair consecutive registers only. There are no unwind + // opcodes for saves/restores of non-consectuve register pairs. + // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x. + // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling + + // TODO: LR can be paired with any register. We don't support this yet in + // the MCLayer. We need to add support for the save_lrpair unwind code. + if (!NeedsWinCFI) + return false; + if (Reg2 == Reg1 + 1) + return false; + return true; +} + namespace { struct RegPairInfo { @@ -1143,7 +1597,7 @@ struct RegPairInfo { unsigned Reg2 = AArch64::NoRegister; int FrameIdx; int Offset; - bool IsGPR; + enum RegType { GPR, FPR64, FPR128 } Type; RegPairInfo() = default; @@ -1160,6 +1614,7 @@ static void computeCalleeSaveRegisterPairs( if (CSI.empty()) return; + bool NeedsWinCFI = needsWinCFI(MF); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); MachineFrameInfo &MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction().getCallingConv(); @@ -1172,28 +1627,50 @@ static void computeCalleeSaveRegisterPairs( (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); int Offset = AFI->getCalleeSavedStackSize(); - + // On Linux, we will have either one or zero non-paired register. On Windows + // with CFI, we can have multiple unpaired registers in order to utilize the + // available unwind codes. This flag assures that the alignment fixup is done + // only once, as intened. + bool FixupDone = false; for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); - assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || - AArch64::FPR64RegClass.contains(RPI.Reg1)); - RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); + if (AArch64::GPR64RegClass.contains(RPI.Reg1)) + RPI.Type = RegPairInfo::GPR; + else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) + RPI.Type = RegPairInfo::FPR64; + else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) + RPI.Type = RegPairInfo::FPR128; + else + llvm_unreachable("Unsupported register class."); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); - if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || - (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) - RPI.Reg2 = NextReg; + switch (RPI.Type) { + case RegPairInfo::GPR: + if (AArch64::GPR64RegClass.contains(NextReg) && + !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) + RPI.Reg2 = NextReg; + break; + case RegPairInfo::FPR64: + if (AArch64::FPR64RegClass.contains(NextReg) && + !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) + RPI.Reg2 = NextReg; + break; + case RegPairInfo::FPR128: + if (AArch64::FPR128RegClass.contains(NextReg)) + RPI.Reg2 = NextReg; + break; + } } // If either of the registers to be saved is the lr register, it means that // we also need to save lr in the shadow call stack. if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) && MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { - if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) + if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18)) report_fatal_error("Must reserve x18 to use shadow call stack"); NeedShadowCallStackProlog = true; } @@ -1219,17 +1696,22 @@ static void computeCalleeSaveRegisterPairs( RPI.FrameIdx = CSI[i].getFrameIdx(); - if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { - // Round up size of non-pair to pair size if we need to pad the - // callee-save area to ensure 16-byte alignment. - Offset -= 16; + int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8; + Offset -= RPI.isPaired() ? 2 * Scale : Scale; + + // Round up size of non-pair to pair size if we need to pad the + // callee-save area to ensure 16-byte alignment. + if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone && + RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) { + FixupDone = true; + Offset -= 8; + assert(Offset % 16 == 0); assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); MFI.setObjectAlignment(RPI.FrameIdx, 16); - AFI->setCalleeSaveStackHasFreeSpace(true); - } else - Offset -= RPI.isPaired() ? 16 : 8; - assert(Offset % 8 == 0); - RPI.Offset = Offset / 8; + } + + assert(Offset % Scale == 0); + RPI.Offset = Offset / Scale; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); @@ -1245,6 +1727,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + bool NeedsWinCFI = needsWinCFI(MF); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; @@ -1262,6 +1745,27 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( .addImm(8) .setMIFlag(MachineInstr::FrameSetup); + if (NeedsWinCFI) + BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); + + if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) { + // Emit a CFI instruction that causes 8 to be subtracted from the value of + // x18 when unwinding past this frame. + static const char CFIInst[] = { + dwarf::DW_CFA_val_expression, + 18, // register + 2, // length + static_cast<char>(unsigned(dwarf::DW_OP_breg18)), + static_cast<char>(-8) & 0x7f, // addend (sleb128) + }; + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst)); + BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + // This instruction also makes x18 live-in to the entry block. MBB.addLiveIn(AArch64::X18); } @@ -1283,16 +1787,41 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. - if (RPI.IsGPR) - StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; - else - StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; + unsigned Size, Align; + switch (RPI.Type) { + case RegPairInfo::GPR: + StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; + Size = 8; + Align = 8; + break; + case RegPairInfo::FPR64: + StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; + Size = 8; + Align = 8; + break; + case RegPairInfo::FPR128: + StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui; + Size = 16; + Align = 16; + break; + } LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; dbgs() << ")\n"); + assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && + "Windows unwdinding requires a consecutive (FP,LR) pair"); + // Windows unwind codes require consecutive registers if registers are + // paired. Make the switch here, so that the code below will save (x,x+1) + // and not (x+1,x). + unsigned FrameIdxReg1 = RPI.FrameIdx; + unsigned FrameIdxReg2 = RPI.FrameIdx + 1; + if (NeedsWinCFI && RPI.isPaired()) { + std::swap(Reg1, Reg2); + std::swap(FrameIdxReg1, FrameIdxReg2); + } MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (!MRI.isReserved(Reg1)) MBB.addLiveIn(Reg1); @@ -1301,16 +1830,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), - MachineMemOperand::MOStore, 8, 8)); + MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), + MachineMemOperand::MOStore, Size, Align)); } MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit + .addImm(RPI.Offset) // [sp, #offset*scale], + // where factor*scale is implicit .setMIFlag(MachineInstr::FrameSetup); MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOStore, 8, 8)); + MachinePointerInfo::getFixedStack(MF,FrameIdxReg1), + MachineMemOperand::MOStore, Size, Align)); + if (NeedsWinCFI) + InsertSEH(MIB, TII, MachineInstr::FrameSetup); + } return true; } @@ -1323,6 +1856,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; + bool NeedsWinCFI = needsWinCFI(MF); if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -1344,32 +1878,57 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( // ldp x22, x21, [sp, #0] // addImm(+0) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; - if (RPI.IsGPR) - LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; - else - LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; + unsigned Size, Align; + switch (RPI.Type) { + case RegPairInfo::GPR: + LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; + Size = 8; + Align = 8; + break; + case RegPairInfo::FPR64: + LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; + Size = 8; + Align = 8; + break; + case RegPairInfo::FPR128: + LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui; + Size = 16; + Align = 16; + break; + } LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; dbgs() << ")\n"); + // Windows unwind codes require consecutive registers if registers are + // paired. Make the switch here, so that the code below will save (x,x+1) + // and not (x+1,x). + unsigned FrameIdxReg1 = RPI.FrameIdx; + unsigned FrameIdxReg2 = RPI.FrameIdx + 1; + if (NeedsWinCFI && RPI.isPaired()) { + std::swap(Reg1, Reg2); + std::swap(FrameIdxReg1, FrameIdxReg2); + } MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (RPI.isPaired()) { MIB.addReg(Reg2, getDefRegState(true)); MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), - MachineMemOperand::MOLoad, 8, 8)); + MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), + MachineMemOperand::MOLoad, Size, Align)); } MIB.addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit + .addImm(RPI.Offset) // [sp, #offset*scale] + // where factor*scale is implicit .setMIFlag(MachineInstr::FrameDestroy); MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOLoad, 8, 8)); + MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), + MachineMemOperand::MOLoad, Size, Align)); + if (NeedsWinCFI) + InsertSEH(MIB, TII, MachineInstr::FrameDestroy); }; - if (ReverseCSRRestoreSeq) for (const RegPairInfo &RPI : reverse(RegPairs)) EmitMI(RPI); @@ -1406,30 +1965,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned UnspilledCSGPRPaired = AArch64::NoRegister; MachineFrameInfo &MFI = MF.getFrameInfo(); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); unsigned BasePointerReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() : (unsigned)AArch64::NoRegister; - unsigned SpillEstimate = SavedRegs.count(); - for (unsigned i = 0; CSRegs[i]; ++i) { - unsigned Reg = CSRegs[i]; - unsigned PairedReg = CSRegs[i ^ 1]; - if (Reg == BasePointerReg) - SpillEstimate++; - if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) - SpillEstimate++; - } - SpillEstimate += 2; // Conservatively include FP+LR in the estimate - unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate; - - // The frame record needs to be created by saving the appropriate registers - if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) { - SavedRegs.set(AArch64::FP); - SavedRegs.set(AArch64::LR); - } - unsigned ExtraCSSpill = 0; // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { @@ -1453,7 +1994,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // MachO's compact unwind format relies on all registers being stored in // pairs. // FIXME: the usual format is actually better if unwinding isn't needed. - if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { + if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister && + !SavedRegs.test(PairedReg)) { SavedRegs.set(PairedReg); if (AArch64::GPR64RegClass.contains(PairedReg) && !RegInfo->isReservedReg(MF, PairedReg)) @@ -1461,6 +2003,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, } } + // Calculates the callee saved stack size. + unsigned CSStackSize = 0; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (unsigned Reg : SavedRegs.set_bits()) + CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8; + + // Save number of saved regs, so we can easily update CSStackSize later. + unsigned NumSavedRegs = SavedRegs.count(); + + // The frame record needs to be created by saving the appropriate registers + unsigned EstimatedStackSize = MFI.estimateStackSize(MF); + if (hasFP(MF) || + windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) { + SavedRegs.set(AArch64::FP); + SavedRegs.set(AArch64::LR); + } + LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; for (unsigned Reg : SavedRegs.set_bits()) dbgs() @@ -1468,15 +2028,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, dbgs() << "\n";); // If any callee-saved registers are used, the frame cannot be eliminated. - unsigned NumRegsSpilled = SavedRegs.count(); - bool CanEliminateFrame = NumRegsSpilled == 0; + bool CanEliminateFrame = SavedRegs.count() == 0; // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. - unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; - LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); - bool BigStack = (CFSize > EstimatedStackSizeLimit); + bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); @@ -1497,7 +2054,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); ExtraCSSpill = UnspilledCSGPRPaired; - NumRegsSpilled = SavedRegs.count(); } // If we didn't find an extra callee-saved register to spill, create @@ -1514,9 +2070,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, } } + // Adding the size of additional 64bit GPR saves. + CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs); + unsigned AlignedCSStackSize = alignTo(CSStackSize, 16); + LLVM_DEBUG(dbgs() << "Estimated stack frame size: " + << EstimatedStackSize + AlignedCSStackSize + << " bytes.\n"); + // Round up to register pair alignment to avoid additional SP adjustment // instructions. - AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); + AFI->setCalleeSavedStackSize(AlignedCSStackSize); + AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); } bool AArch64FrameLowering::enableStackSlotScavenging( @@ -1524,3 +2088,69 @@ bool AArch64FrameLowering::enableStackSlotScavenging( const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); return AFI->hasCalleeSaveStackFreeSpace(); } + +void AArch64FrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + // If this function isn't doing Win64-style C++ EH, we don't need to do + // anything. + if (!MF.hasEHFunclets()) + return; + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); + + MachineBasicBlock &MBB = MF.front(); + auto MBBI = MBB.begin(); + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + if (MBBI->isTerminator()) + return; + + // Create an UnwindHelp object. + int UnwindHelpFI = + MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); + EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; + // We need to store -2 into the UnwindHelp object at the start of the + // function. + DebugLoc DL; + RS->enterBasicBlock(MBB); + unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0); + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); + BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) + .addReg(DstReg, getKillRegState(true)) + .addFrameIndex(UnwindHelpFI) + .addImm(0); +} + +/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before +/// the update. This is easily retrieved as it is exactly the offset that is set +/// in processFunctionBeforeFrameFinalized. +int AArch64FrameLowering::getFrameIndexReferencePreferSP( + const MachineFunction &MF, int FI, unsigned &FrameReg, + bool IgnoreSPUpdates) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " + << MFI.getObjectOffset(FI) << "\n"); + FrameReg = AArch64::SP; + return MFI.getObjectOffset(FI); +} + +/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve +/// the parent's frame pointer +unsigned AArch64FrameLowering::getWinEHParentFrameOffset( + const MachineFunction &MF) const { + return 0; +} + +/// Funclets only need to account for space for the callee saved registers, +/// as the locals are accounted for in the parent's stack frame. +unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( + const MachineFunction &MF) const { + // This is the size of the pushed CSRs. + unsigned CSSize = + MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize(); + // This is the amount of stack a funclet needs to allocate. + return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), + getStackAlignment()); +} |
