diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64FrameLowering.cpp')
| -rw-r--r-- | lib/Target/AArch64/AArch64FrameLowering.cpp | 904 | 
1 files changed, 767 insertions, 137 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 6dc5d19862a94..538a8d7e8fbcf 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -98,6 +98,7 @@  #include "AArch64Subtarget.h"  #include "AArch64TargetMachine.h"  #include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/ScopeExit.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/CodeGen/LivePhysRegs.h" @@ -114,11 +115,13 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/CallingConv.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h"  #include "llvm/MC/MCDwarf.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" @@ -201,6 +204,11 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {  bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {    const MachineFrameInfo &MFI = MF.getFrameInfo();    const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); +  // Win64 EH requires a frame pointer if funclets are present, as the locals +  // are accessed off the frame pointer in both the parent function and the +  // funclets. +  if (MF.hasEHFunclets()) +    return true;    // Retain behavior of always omitting the FP for leaf functions when possible.    if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))      return true; @@ -279,6 +287,31 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(    return MBB.erase(I);  } +static bool ShouldSignReturnAddress(MachineFunction &MF) { +  // The function should be signed in the following situations: +  // - sign-return-address=all +  // - sign-return-address=non-leaf and the functions spills the LR + +  const Function &F = MF.getFunction(); +  if (!F.hasFnAttribute("sign-return-address")) +    return false; + +  StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString(); +  if (Scope.equals("none")) +    return false; + +  if (Scope.equals("all")) +    return true; + +  assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf"); + +  for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo()) +    if (Info.getReg() == AArch64::LR) +      return true; + +  return false; +} +  void AArch64FrameLowering::emitCalleeSavedFrameMoves(      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {    MachineFunction &MF = *MBB.getParent(); @@ -330,7 +363,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {    LiveRegs.addLiveIns(*MBB);    // Mark callee saved registers as used so we will not choose them. -  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); +  const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();    for (unsigned i = 0; CSRegs[i]; ++i)      LiveRegs.addReg(CSRegs[i]); @@ -408,54 +441,217 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(    return true;  } +// Given a load or a store instruction, generate an appropriate unwinding SEH +// code on Windows. +static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, +                                             const TargetInstrInfo &TII, +                                             MachineInstr::MIFlag Flag) { +  unsigned Opc = MBBI->getOpcode(); +  MachineBasicBlock *MBB = MBBI->getParent(); +  MachineFunction &MF = *MBB->getParent(); +  DebugLoc DL = MBBI->getDebugLoc(); +  unsigned ImmIdx = MBBI->getNumOperands() - 1; +  int Imm = MBBI->getOperand(ImmIdx).getImm(); +  MachineInstrBuilder MIB; +  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); +  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + +  switch (Opc) { +  default: +    llvm_unreachable("No SEH Opcode for this instruction"); +  case AArch64::LDPDpost: +    Imm = -Imm; +    LLVM_FALLTHROUGH; +  case AArch64::STPDpre: { +    unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); +    unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); +    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X)) +              .addImm(Reg0) +              .addImm(Reg1) +              .addImm(Imm * 8) +              .setMIFlag(Flag); +    break; +  } +  case AArch64::LDPXpost: +    Imm = -Imm; +    LLVM_FALLTHROUGH; +  case AArch64::STPXpre: { +    unsigned Reg0 = MBBI->getOperand(1).getReg(); +    unsigned Reg1 = MBBI->getOperand(2).getReg(); +    if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) +      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) +                .addImm(Imm * 8) +                .setMIFlag(Flag); +    else +      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X)) +                .addImm(RegInfo->getSEHRegNum(Reg0)) +                .addImm(RegInfo->getSEHRegNum(Reg1)) +                .addImm(Imm * 8) +                .setMIFlag(Flag); +    break; +  } +  case AArch64::LDRDpost: +    Imm = -Imm; +    LLVM_FALLTHROUGH; +  case AArch64::STRDpre: { +    unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); +    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X)) +              .addImm(Reg) +              .addImm(Imm) +              .setMIFlag(Flag); +    break; +  } +  case AArch64::LDRXpost: +    Imm = -Imm; +    LLVM_FALLTHROUGH; +  case AArch64::STRXpre: { +    unsigned Reg =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); +    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X)) +              .addImm(Reg) +              .addImm(Imm) +              .setMIFlag(Flag); +    break; +  } +  case AArch64::STPDi: +  case AArch64::LDPDi: { +    unsigned Reg0 =  RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); +    unsigned Reg1 =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); +    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP)) +              .addImm(Reg0) +              .addImm(Reg1) +              .addImm(Imm * 8) +              .setMIFlag(Flag); +    break; +  } +  case AArch64::STPXi: +  case AArch64::LDPXi: { +    unsigned Reg0 = MBBI->getOperand(0).getReg(); +    unsigned Reg1 = MBBI->getOperand(1).getReg(); +    if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) +      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) +                .addImm(Imm * 8) +                .setMIFlag(Flag); +    else +      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP)) +                .addImm(RegInfo->getSEHRegNum(Reg0)) +                .addImm(RegInfo->getSEHRegNum(Reg1)) +                .addImm(Imm * 8) +                .setMIFlag(Flag); +    break; +  } +  case AArch64::STRXui: +  case AArch64::LDRXui: { +    int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); +    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg)) +              .addImm(Reg) +              .addImm(Imm * 8) +              .setMIFlag(Flag); +    break; +  } +  case AArch64::STRDui: +  case AArch64::LDRDui: { +    unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); +    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg)) +              .addImm(Reg) +              .addImm(Imm * 8) +              .setMIFlag(Flag); +    break; +  } +  } +  auto I = MBB->insertAfter(MBBI, MIB); +  return I; +} + +// Fix up the SEH opcode associated with the save/restore instruction. +static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, +                           unsigned LocalStackSize) { +  MachineOperand *ImmOpnd = nullptr; +  unsigned ImmIdx = MBBI->getNumOperands() - 1; +  switch (MBBI->getOpcode()) { +  default: +    llvm_unreachable("Fix the offset in the SEH instruction"); +  case AArch64::SEH_SaveFPLR: +  case AArch64::SEH_SaveRegP: +  case AArch64::SEH_SaveReg: +  case AArch64::SEH_SaveFRegP: +  case AArch64::SEH_SaveFReg: +    ImmOpnd = &MBBI->getOperand(ImmIdx); +    break; +  } +  if (ImmOpnd) +    ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); +} +  // Convert callee-save register save/restore instruction to do stack pointer  // decrement/increment to allocate/deallocate the callee-save stack area by  // converting store/load to use pre/post increment version.  static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(      MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, -    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { +    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, +    bool NeedsWinCFI, bool InProlog = true) {    // Ignore instructions that do not operate on SP, i.e. shadow call stack -  // instructions. +  // instructions and associated CFI instruction.    while (MBBI->getOpcode() == AArch64::STRXpost || -         MBBI->getOpcode() == AArch64::LDRXpre) { -    assert(MBBI->getOperand(0).getReg() != AArch64::SP); +         MBBI->getOpcode() == AArch64::LDRXpre || +         MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) { +    if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION) +      assert(MBBI->getOperand(0).getReg() != AArch64::SP);      ++MBBI;    } -    unsigned NewOpc; -  bool NewIsUnscaled = false; +  int Scale = 1;    switch (MBBI->getOpcode()) {    default:      llvm_unreachable("Unexpected callee-save save/restore opcode!");    case AArch64::STPXi:      NewOpc = AArch64::STPXpre; +    Scale = 8;      break;    case AArch64::STPDi:      NewOpc = AArch64::STPDpre; +    Scale = 8; +    break; +  case AArch64::STPQi: +    NewOpc = AArch64::STPQpre; +    Scale = 16;      break;    case AArch64::STRXui:      NewOpc = AArch64::STRXpre; -    NewIsUnscaled = true;      break;    case AArch64::STRDui:      NewOpc = AArch64::STRDpre; -    NewIsUnscaled = true; +    break; +  case AArch64::STRQui: +    NewOpc = AArch64::STRQpre;      break;    case AArch64::LDPXi:      NewOpc = AArch64::LDPXpost; +    Scale = 8;      break;    case AArch64::LDPDi:      NewOpc = AArch64::LDPDpost; +    Scale = 8; +    break; +  case AArch64::LDPQi: +    NewOpc = AArch64::LDPQpost; +    Scale = 16;      break;    case AArch64::LDRXui:      NewOpc = AArch64::LDRXpost; -    NewIsUnscaled = true;      break;    case AArch64::LDRDui:      NewOpc = AArch64::LDRDpost; -    NewIsUnscaled = true; +    break; +  case AArch64::LDRQui: +    NewOpc = AArch64::LDRQpost;      break;    } +  // Get rid of the SEH code associated with the old instruction. +  if (NeedsWinCFI) { +    auto SEH = std::next(MBBI); +    if (AArch64InstrInfo::isSEHInstruction(*SEH)) +      SEH->eraseFromParent(); +  }    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));    MIB.addReg(AArch64::SP, RegState::Define); @@ -471,15 +667,16 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(           "instruction!");    assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&           "Unexpected base register in callee-save save/restore instruction!"); -  // Last operand is immediate offset that needs fixing. -  assert(CSStackSizeInc % 8 == 0); -  int64_t CSStackSizeIncImm = CSStackSizeInc; -  if (!NewIsUnscaled) -    CSStackSizeIncImm /= 8; -  MIB.addImm(CSStackSizeIncImm); +  assert(CSStackSizeInc % Scale == 0); +  MIB.addImm(CSStackSizeInc / Scale);    MIB.setMIFlags(MBBI->getFlags()); -  MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); +  MIB.setMemRefs(MBBI->memoperands()); + +  // Generate a new SEH code that corresponds to the new instruction. +  if (NeedsWinCFI) +    InsertSEH(*MIB, *TII, +              InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);    return std::prev(MBB.erase(MBBI));  } @@ -487,22 +684,43 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(  // Fixup callee-save register save/restore instructions to take into account  // combined SP bump by adding the local stack size to the stack offsets.  static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, -                                              unsigned LocalStackSize) { +                                              unsigned LocalStackSize, +                                              bool NeedsWinCFI) { +  if (AArch64InstrInfo::isSEHInstruction(MI)) +    return; +    unsigned Opc = MI.getOpcode();    // Ignore instructions that do not operate on SP, i.e. shadow call stack -  // instructions. -  if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) { -    assert(MI.getOperand(0).getReg() != AArch64::SP); +  // instructions and associated CFI instruction. +  if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre || +      Opc == AArch64::CFI_INSTRUCTION) { +    if (Opc != AArch64::CFI_INSTRUCTION) +      assert(MI.getOperand(0).getReg() != AArch64::SP);      return;    } -  (void)Opc; -  assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || -          Opc == AArch64::STRXui || Opc == AArch64::STRDui || -          Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || -          Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && -         "Unexpected callee-save save/restore opcode!"); +  unsigned Scale; +  switch (Opc) { +  case AArch64::STPXi: +  case AArch64::STRXui: +  case AArch64::STPDi: +  case AArch64::STRDui: +  case AArch64::LDPXi: +  case AArch64::LDRXui: +  case AArch64::LDPDi: +  case AArch64::LDRDui: +    Scale = 8; +    break; +  case AArch64::STPQi: +  case AArch64::STRQui: +  case AArch64::LDPQi: +  case AArch64::LDRQui: +    Scale = 16; +    break; +  default: +    llvm_unreachable("Unexpected callee-save save/restore opcode!"); +  }    unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;    assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && @@ -510,8 +728,16 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,    // Last operand is immediate offset that needs fixing.    MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);    // All generated opcodes have scaled offsets. -  assert(LocalStackSize % 8 == 0); -  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); +  assert(LocalStackSize % Scale == 0); +  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale); + +  if (NeedsWinCFI) { +    auto MBBI = std::next(MachineBasicBlock::iterator(MI)); +    assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction"); +    assert(AArch64InstrInfo::isSEHInstruction(*MBBI) && +           "Expecting a SEH instruction"); +    fixupSEHOpcode(MBBI, LocalStackSize); +  }  }  static void adaptForLdStOpt(MachineBasicBlock &MBB, @@ -546,6 +772,23 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB,    //  } +static bool ShouldSignWithAKey(MachineFunction &MF) { +  const Function &F = MF.getFunction(); +  if (!F.hasFnAttribute("sign-return-address-key")) +    return true; + +  const StringRef Key = +      F.getFnAttribute("sign-return-address-key").getValueAsString(); +  assert(Key.equals_lower("a_key") || Key.equals_lower("b_key")); +  return Key.equals_lower("a_key"); +} + +static bool needsWinCFI(const MachineFunction &MF) { +  const Function &F = MF.getFunction(); +  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && +         F.needsUnwindTableEntry(); +} +  void AArch64FrameLowering::emitPrologue(MachineFunction &MF,                                          MachineBasicBlock &MBB) const {    MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -556,8 +799,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,    const TargetInstrInfo *TII = Subtarget.getInstrInfo();    MachineModuleInfo &MMI = MF.getMMI();    AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); -  bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); +  bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) && +                         !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();    bool HasFP = hasFP(MF); +  bool NeedsWinCFI = needsWinCFI(MF); +  MF.setHasWinCFI(NeedsWinCFI); +  bool IsFunclet = MBB.isEHFuncletEntry();    // At this point, we're going to decide whether or not the function uses a    // redzone. In most cases, the function doesn't have a redzone so let's @@ -568,18 +815,41 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,    // to determine the end of the prologue.    DebugLoc DL; +  if (ShouldSignReturnAddress(MF)) { +    if (ShouldSignWithAKey(MF)) +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP)) +          .setMIFlag(MachineInstr::FrameSetup); +    else { +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY)) +          .setMIFlag(MachineInstr::FrameSetup); +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP)) +          .setMIFlag(MachineInstr::FrameSetup); +    } + +    unsigned CFIIndex = +        MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); +    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) +        .addCFIIndex(CFIIndex) +        .setMIFlags(MachineInstr::FrameSetup); +  } +    // All calls are tail calls in GHC calling conv, and functions have no    // prologue/epilogue.    if (MF.getFunction().getCallingConv() == CallingConv::GHC)      return; -  int NumBytes = (int)MFI.getStackSize(); +  // getStackSize() includes all the locals in its size calculation. We don't +  // include these locals when computing the stack size of a funclet, as they +  // are allocated in the parent's stack frame and accessed via the frame +  // pointer from the funclet.  We only save the callee saved registers in the +  // funclet, which are really the callee saved registers of the parent +  // function, including the funclet. +  int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) +                           : (int)MFI.getStackSize();    if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {      assert(!HasFP && "unexpected function without stack frame but with FP"); -      // All of the stack allocation is for locals.      AFI->setLocalStackSize(NumBytes); -      if (!NumBytes)        return;      // REDZONE: If the stack size is less than 128 bytes, we don't need @@ -589,36 +859,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,        ++NumRedZoneFunctions;      } else {        emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, -                      MachineInstr::FrameSetup); - -      // Label used to tie together the PROLOG_LABEL and the MachineMoves. -      MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); -      // Encode the stack size of the leaf function. -      unsigned CFIIndex = MF.addFrameInst( -          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); -      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) -          .addCFIIndex(CFIIndex) -          .setMIFlags(MachineInstr::FrameSetup); +                      MachineInstr::FrameSetup, false, NeedsWinCFI); +      if (!NeedsWinCFI) { +        // Label used to tie together the PROLOG_LABEL and the MachineMoves. +        MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); +        // Encode the stack size of the leaf function. +        unsigned CFIIndex = MF.addFrameInst( +            MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); +        BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) +            .addCFIIndex(CFIIndex) +            .setMIFlags(MachineInstr::FrameSetup); +      }      } + +    if (NeedsWinCFI) +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) +          .setMIFlag(MachineInstr::FrameSetup); +      return;    }    bool IsWin64 =        Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); -  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; +  // Var args are accounted for in the containing function, so don't +  // include them for funclets. +  unsigned FixedObject = (IsWin64 && !IsFunclet) ? +                         alignTo(AFI->getVarArgsGPRSize(), 16) : 0;    auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;    // All of the remaining stack allocations are for locals.    AFI->setLocalStackSize(NumBytes - PrologueSaveSize); -    bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);    if (CombineSPBump) {      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, -                    MachineInstr::FrameSetup); +                    MachineInstr::FrameSetup, false, NeedsWinCFI);      NumBytes = 0;    } else if (PrologueSaveSize != 0) { -    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, -                                                     -PrologueSaveSize); +    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( +        MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI);      NumBytes -= PrologueSaveSize;    }    assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -629,9 +907,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,    MachineBasicBlock::iterator End = MBB.end();    while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {      if (CombineSPBump) -      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); +      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), +                                        NeedsWinCFI);      ++MBBI;    } + +  // The code below is not applicable to funclets. We have emitted all the SEH +  // opcodes that we needed to emit.  The FP and BP belong to the containing +  // function. +  if (IsFunclet) { +    if (NeedsWinCFI) +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) +          .setMIFlag(MachineInstr::FrameSetup); +    return; +  } +    if (HasFP) {      // Only set up FP if we actually need to. Frame pointer is fp =      // sp - fixedobject - 16. @@ -644,24 +934,58 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,      // Note: All stores of callee-saved registers are marked as "FrameSetup".      // This code marks the instruction(s) that set the FP also.      emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, -                    MachineInstr::FrameSetup); +                    MachineInstr::FrameSetup, false, NeedsWinCFI);    }    if (windowsRequiresStackProbe(MF, NumBytes)) {      uint32_t NumWords = NumBytes >> 4; - -    BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) -        .addImm(NumWords) -        .setMIFlags(MachineInstr::FrameSetup); +    if (NeedsWinCFI) { +      // alloc_l can hold at most 256MB, so assume that NumBytes doesn't +      // exceed this amount.  We need to move at most 2^24 - 1 into x15. +      // This is at most two instructions, MOVZ follwed by MOVK. +      // TODO: Fix to use multiple stack alloc unwind codes for stacks +      // exceeding 256MB in size. +      if (NumBytes >= (1 << 28)) +        report_fatal_error("Stack size cannot exceed 256MB for stack " +                            "unwinding purposes"); + +      uint32_t LowNumWords = NumWords & 0xFFFF; +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15) +            .addImm(LowNumWords) +            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) +            .setMIFlag(MachineInstr::FrameSetup); +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) +            .setMIFlag(MachineInstr::FrameSetup); +      if ((NumWords & 0xFFFF0000) != 0) { +          BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15) +              .addReg(AArch64::X15) +              .addImm((NumWords & 0xFFFF0000) >> 16) // High half +              .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16)) +              .setMIFlag(MachineInstr::FrameSetup); +          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) +            .setMIFlag(MachineInstr::FrameSetup); +      } +    } else { +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) +          .addImm(NumWords) +          .setMIFlags(MachineInstr::FrameSetup); +    }      switch (MF.getTarget().getCodeModel()) { +    case CodeModel::Tiny:      case CodeModel::Small:      case CodeModel::Medium:      case CodeModel::Kernel:        BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))            .addExternalSymbol("__chkstk")            .addReg(AArch64::X15, RegState::Implicit) +          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) +          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) +          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)            .setMIFlags(MachineInstr::FrameSetup); +      if (NeedsWinCFI) +        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) +            .setMIFlag(MachineInstr::FrameSetup);        break;      case CodeModel::Large:        BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) @@ -669,11 +993,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,            .addExternalSymbol("__chkstk")            .addExternalSymbol("__chkstk")            .setMIFlags(MachineInstr::FrameSetup); +      if (NeedsWinCFI) +        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) +            .setMIFlag(MachineInstr::FrameSetup);        BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))            .addReg(AArch64::X16, RegState::Kill)            .addReg(AArch64::X15, RegState::Implicit | RegState::Define) +          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) +          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) +          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)            .setMIFlags(MachineInstr::FrameSetup); +      if (NeedsWinCFI) +        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) +            .setMIFlag(MachineInstr::FrameSetup);        break;      } @@ -682,6 +1015,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,          .addReg(AArch64::X15, RegState::Kill)          .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))          .setMIFlags(MachineInstr::FrameSetup); +    if (NeedsWinCFI) +       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) +            .addImm(NumBytes) +            .setMIFlag(MachineInstr::FrameSetup);      NumBytes = 0;    } @@ -701,7 +1038,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,        // the correct value here, as NumBytes also includes padding bytes,        // which shouldn't be counted here.        emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, -                      MachineInstr::FrameSetup); +                      MachineInstr::FrameSetup, false, NeedsWinCFI);      if (NeedsRealignment) {        const unsigned Alignment = MFI.getMaxAlignment(); @@ -724,6 +1061,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,            .addReg(scratchSPReg, RegState::Kill)            .addImm(andMaskEncoded);        AFI->setStackRealigned(true); +      if (NeedsWinCFI) +        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) +            .addImm(NumBytes & andMaskEncoded) +            .setMIFlag(MachineInstr::FrameSetup);      }    } @@ -737,8 +1078,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,    if (RegInfo->hasBasePointer(MF)) {      TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,                       false); +    if (NeedsWinCFI) +      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) +          .setMIFlag(MachineInstr::FrameSetup);    } +  // The very last FrameSetup instruction indicates the end of prologue. Emit a +  // SEH opcode indicating the prologue end. +  if (NeedsWinCFI) +    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) +        .setMIFlag(MachineInstr::FrameSetup); +    if (needsFrameMoves) {      const DataLayout &TD = MF.getDataLayout();      const int StackGrowth = -TD.getPointerSize(0); @@ -832,6 +1182,46 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,    }  } +static void InsertReturnAddressAuth(MachineFunction &MF, +                                    MachineBasicBlock &MBB) { +  if (!ShouldSignReturnAddress(MF)) +    return; +  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); +  const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + +  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); +  DebugLoc DL; +  if (MBBI != MBB.end()) +    DL = MBBI->getDebugLoc(); + +  // The AUTIASP instruction assembles to a hint instruction before v8.3a so +  // this instruction can safely used for any v8a architecture. +  // From v8.3a onwards there are optimised authenticate LR and return +  // instructions, namely RETA{A,B}, that can be used instead. +  if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() && +      MBBI->getOpcode() == AArch64::RET_ReallyLR) { +    BuildMI(MBB, MBBI, DL, +            TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB)) +        .copyImplicitOps(*MBBI); +    MBB.erase(MBBI); +  } else { +    BuildMI( +        MBB, MBBI, DL, +        TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP)) +        .setMIFlag(MachineInstr::FrameDestroy); +  } +} + +static bool isFuncletReturnInstr(const MachineInstr &MI) { +  switch (MI.getOpcode()) { +  default: +    return false; +  case AArch64::CATCHRET: +  case AArch64::CLEANUPRET: +    return true; +  } +} +  void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,                                          MachineBasicBlock &MBB) const {    MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -840,14 +1230,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,    const TargetInstrInfo *TII = Subtarget.getInstrInfo();    DebugLoc DL;    bool IsTailCallReturn = false; +  bool NeedsWinCFI = needsWinCFI(MF); +  bool IsFunclet = false; +    if (MBB.end() != MBBI) {      DL = MBBI->getDebugLoc();      unsigned RetOpcode = MBBI->getOpcode();      IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || -      RetOpcode == AArch64::TCRETURNri; +                       RetOpcode == AArch64::TCRETURNri || +                       RetOpcode == AArch64::TCRETURNriBTI; +    IsFunclet = isFuncletReturnInstr(*MBBI);    } -  int NumBytes = MFI.getStackSize(); -  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + +  int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) +                           : MFI.getStackSize(); +  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();    // All calls are tail calls in GHC calling conv, and functions have no    // prologue/epilogue. @@ -899,25 +1296,38 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,    // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps    // it as the 2nd argument of AArch64ISD::TC_RETURN. +  auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); }); +    bool IsWin64 =        Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); -  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; +  // Var args are accounted for in the containing function, so don't +  // include them for funclets. +  unsigned FixedObject = +      (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;    uint64_t AfterCSRPopSize = ArgumentPopSize;    auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; +  // We cannot rely on the local stack size set in emitPrologue if the function +  // has funclets, as funclets have different local stack size requirements, and +  // the current value set in emitPrologue may be that of the containing +  // function. +  if (MF.hasEHFunclets()) +    AFI->setLocalStackSize(NumBytes - PrologueSaveSize);    bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);    // Assume we can't combine the last pop with the sp restore.    if (!CombineSPBump && PrologueSaveSize != 0) {      MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); +    while (AArch64InstrInfo::isSEHInstruction(*Pop)) +      Pop = std::prev(Pop);      // Converting the last ldp to a post-index ldp is valid only if the last      // ldp's offset is 0.      const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);      // If the offset is 0, convert it to a post-index ldp. -    if (OffsetOp.getImm() == 0) { -      convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, -                                                PrologueSaveSize); -    } else { +    if (OffsetOp.getImm() == 0) +      convertCalleeSaveRestoreToSPPrePostIncDec( +          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false); +    else {        // If not, make sure to emit an add after the last ldp.        // We're doing this by transfering the size to be restored from the        // adjustment *before* the CSR pops to the adjustment *after* the CSR @@ -937,14 +1347,23 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,        ++LastPopI;        break;      } else if (CombineSPBump) -      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); +      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), +                                        NeedsWinCFI);    } +  if (NeedsWinCFI) +    BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) +        .setMIFlag(MachineInstr::FrameDestroy); +    // If there is a single SP update, insert it before the ret and we're done.    if (CombineSPBump) {      emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, -                    NumBytes + AfterCSRPopSize, TII, -                    MachineInstr::FrameDestroy); +                    NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, +                    false, NeedsWinCFI); +    if (NeedsWinCFI) +      BuildMI(MBB, MBB.getFirstTerminator(), DL, +              TII->get(AArch64::SEH_EpilogEnd)) +          .setMIFlag(MachineInstr::FrameDestroy);      return;    } @@ -972,9 +1391,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,        adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, -                    StackRestoreBytes, TII, MachineInstr::FrameDestroy); -    if (Done) +                    StackRestoreBytes, TII, MachineInstr::FrameDestroy, false, +                    NeedsWinCFI); +    if (Done) { +      if (NeedsWinCFI) +        BuildMI(MBB, MBB.getFirstTerminator(), DL, +                TII->get(AArch64::SEH_EpilogEnd)) +            .setMIFlag(MachineInstr::FrameDestroy);        return; +    }      NumBytes = 0;    } @@ -983,13 +1408,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,    // FIXME: Rather than doing the math here, we should instead just use    // non-post-indexed loads for the restores if we aren't actually going to    // be able to save any instructions. -  if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) +  if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,                      -AFI->getCalleeSavedStackSize() + 16, TII, -                    MachineInstr::FrameDestroy); +                    MachineInstr::FrameDestroy, false, NeedsWinCFI);    else if (NumBytes)      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, -                    MachineInstr::FrameDestroy); +                    MachineInstr::FrameDestroy, false, NeedsWinCFI);    // This must be placed after the callee-save restore code because that code    // assumes the SP is at the same location as it was after the callee-save save @@ -1010,8 +1435,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,      adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);      emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, -                    AfterCSRPopSize, TII, MachineInstr::FrameDestroy); +                    AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false, +                    NeedsWinCFI);    } +  if (NeedsWinCFI) +    BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) +        .setMIFlag(MachineInstr::FrameDestroy);  }  /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -1084,6 +1513,14 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,          // being in range for direct access. If the FPOffset is positive,          // that'll always be best, as the SP will be even further away.          UseFP = true; +      } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) { +        // Funclets access the locals contained in the parent's stack frame +        // via the frame pointer, so we have to use the FP in the parent +        // function. +        assert( +            Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) && +            "Funclets should only be present on Win64"); +        UseFP = true;        } else {          // We have the choice between FP and (SP or BP).          if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. @@ -1136,6 +1573,23 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {             Attrs.hasAttrSomewhere(Attribute::SwiftError));  } +static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, +                                             bool NeedsWinCFI) { +  // If we are generating register pairs for a Windows function that requires +  // EH support, then pair consecutive registers only.  There are no unwind +  // opcodes for saves/restores of non-consectuve register pairs. +  // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x. +  // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling + +  // TODO: LR can be paired with any register.  We don't support this yet in +  // the MCLayer.  We need to add support for the save_lrpair unwind code. +  if (!NeedsWinCFI) +    return false; +  if (Reg2 == Reg1 + 1) +    return false; +  return true; +} +  namespace {  struct RegPairInfo { @@ -1143,7 +1597,7 @@ struct RegPairInfo {    unsigned Reg2 = AArch64::NoRegister;    int FrameIdx;    int Offset; -  bool IsGPR; +  enum RegType { GPR, FPR64, FPR128 } Type;    RegPairInfo() = default; @@ -1160,6 +1614,7 @@ static void computeCalleeSaveRegisterPairs(    if (CSI.empty())      return; +  bool NeedsWinCFI = needsWinCFI(MF);    AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();    MachineFrameInfo &MFI = MF.getFrameInfo();    CallingConv::ID CC = MF.getFunction().getCallingConv(); @@ -1172,28 +1627,50 @@ static void computeCalleeSaveRegisterPairs(            (Count & 1) == 0) &&           "Odd number of callee-saved regs to spill!");    int Offset = AFI->getCalleeSavedStackSize(); - +  // On Linux, we will have either one or zero non-paired register.  On Windows +  // with CFI, we can have multiple unpaired registers in order to utilize the +  // available unwind codes.  This flag assures that the alignment fixup is done +  // only once, as intened. +  bool FixupDone = false;    for (unsigned i = 0; i < Count; ++i) {      RegPairInfo RPI;      RPI.Reg1 = CSI[i].getReg(); -    assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || -           AArch64::FPR64RegClass.contains(RPI.Reg1)); -    RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); +    if (AArch64::GPR64RegClass.contains(RPI.Reg1)) +      RPI.Type = RegPairInfo::GPR; +    else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) +      RPI.Type = RegPairInfo::FPR64; +    else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) +      RPI.Type = RegPairInfo::FPR128; +    else +      llvm_unreachable("Unsupported register class.");      // Add the next reg to the pair if it is in the same register class.      if (i + 1 < Count) {        unsigned NextReg = CSI[i + 1].getReg(); -      if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || -          (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) -        RPI.Reg2 = NextReg; +      switch (RPI.Type) { +      case RegPairInfo::GPR: +        if (AArch64::GPR64RegClass.contains(NextReg) && +            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) +          RPI.Reg2 = NextReg; +        break; +      case RegPairInfo::FPR64: +        if (AArch64::FPR64RegClass.contains(NextReg) && +            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) +          RPI.Reg2 = NextReg; +        break; +      case RegPairInfo::FPR128: +        if (AArch64::FPR128RegClass.contains(NextReg)) +          RPI.Reg2 = NextReg; +        break; +      }      }      // If either of the registers to be saved is the lr register, it means that      // we also need to save lr in the shadow call stack.      if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&          MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { -      if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) +      if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))          report_fatal_error("Must reserve x18 to use shadow call stack");        NeedShadowCallStackProlog = true;      } @@ -1219,17 +1696,22 @@ static void computeCalleeSaveRegisterPairs(      RPI.FrameIdx = CSI[i].getFrameIdx(); -    if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { -      // Round up size of non-pair to pair size if we need to pad the -      // callee-save area to ensure 16-byte alignment. -      Offset -= 16; +    int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8; +    Offset -= RPI.isPaired() ? 2 * Scale : Scale; + +    // Round up size of non-pair to pair size if we need to pad the +    // callee-save area to ensure 16-byte alignment. +    if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone && +        RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) { +      FixupDone = true; +      Offset -= 8; +      assert(Offset % 16 == 0);        assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);        MFI.setObjectAlignment(RPI.FrameIdx, 16); -      AFI->setCalleeSaveStackHasFreeSpace(true); -    } else -      Offset -= RPI.isPaired() ? 16 : 8; -    assert(Offset % 8 == 0); -    RPI.Offset = Offset / 8; +    } + +    assert(Offset % Scale == 0); +    RPI.Offset = Offset / Scale;      assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&             "Offset out of bounds for LDP/STP immediate"); @@ -1245,6 +1727,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(      const TargetRegisterInfo *TRI) const {    MachineFunction &MF = *MBB.getParent();    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +  bool NeedsWinCFI = needsWinCFI(MF);    DebugLoc DL;    SmallVector<RegPairInfo, 8> RegPairs; @@ -1262,6 +1745,27 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(          .addImm(8)          .setMIFlag(MachineInstr::FrameSetup); +    if (NeedsWinCFI) +      BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop)) +          .setMIFlag(MachineInstr::FrameSetup); + +    if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) { +      // Emit a CFI instruction that causes 8 to be subtracted from the value of +      // x18 when unwinding past this frame. +      static const char CFIInst[] = { +          dwarf::DW_CFA_val_expression, +          18, // register +          2,  // length +          static_cast<char>(unsigned(dwarf::DW_OP_breg18)), +          static_cast<char>(-8) & 0x7f, // addend (sleb128) +      }; +      unsigned CFIIndex = +          MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst)); +      BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION)) +          .addCFIIndex(CFIIndex) +          .setMIFlag(MachineInstr::FrameSetup); +    } +      // This instruction also makes x18 live-in to the entry block.      MBB.addLiveIn(AArch64::X18);    } @@ -1283,16 +1787,41 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(      // Rationale: This sequence saves uop updates compared to a sequence of      // pre-increment spills like stp xi,xj,[sp,#-16]!      // Note: Similar rationale and sequence for restores in epilog. -    if (RPI.IsGPR) -      StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; -    else -      StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; +    unsigned Size, Align; +    switch (RPI.Type) { +    case RegPairInfo::GPR: +       StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; +       Size = 8; +       Align = 8; +       break; +    case RegPairInfo::FPR64: +       StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; +       Size = 8; +       Align = 8; +       break; +    case RegPairInfo::FPR128: +       StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui; +       Size = 16; +       Align = 16; +       break; +    }      LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);                 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);                 dbgs() << ") -> fi#(" << RPI.FrameIdx;                 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;                 dbgs() << ")\n"); +    assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && +           "Windows unwdinding requires a consecutive (FP,LR) pair"); +    // Windows unwind codes require consecutive registers if registers are +    // paired.  Make the switch here, so that the code below will save (x,x+1) +    // and not (x+1,x). +    unsigned FrameIdxReg1 = RPI.FrameIdx; +    unsigned FrameIdxReg2 = RPI.FrameIdx + 1; +    if (NeedsWinCFI && RPI.isPaired()) { +      std::swap(Reg1, Reg2); +      std::swap(FrameIdxReg1, FrameIdxReg2); +    }      MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));      if (!MRI.isReserved(Reg1))        MBB.addLiveIn(Reg1); @@ -1301,16 +1830,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(          MBB.addLiveIn(Reg2);        MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));        MIB.addMemOperand(MF.getMachineMemOperand( -          MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), -          MachineMemOperand::MOStore, 8, 8)); +          MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), +          MachineMemOperand::MOStore, Size, Align));      }      MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))          .addReg(AArch64::SP) -        .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit +        .addImm(RPI.Offset) // [sp, #offset*scale], +                            // where factor*scale is implicit          .setMIFlag(MachineInstr::FrameSetup);      MIB.addMemOperand(MF.getMachineMemOperand( -        MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), -        MachineMemOperand::MOStore, 8, 8)); +        MachinePointerInfo::getFixedStack(MF,FrameIdxReg1), +        MachineMemOperand::MOStore, Size, Align)); +    if (NeedsWinCFI) +      InsertSEH(MIB, TII, MachineInstr::FrameSetup); +    }    return true;  } @@ -1323,6 +1856,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();    DebugLoc DL;    SmallVector<RegPairInfo, 8> RegPairs; +  bool NeedsWinCFI = needsWinCFI(MF);    if (MI != MBB.end())      DL = MI->getDebugLoc(); @@ -1344,32 +1878,57 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(      //    ldp     x22, x21, [sp, #0]      // addImm(+0)      // Note: see comment in spillCalleeSavedRegisters()      unsigned LdrOpc; -    if (RPI.IsGPR) -      LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; -    else -      LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; +    unsigned Size, Align; +    switch (RPI.Type) { +    case RegPairInfo::GPR: +       LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; +       Size = 8; +       Align = 8; +       break; +    case RegPairInfo::FPR64: +       LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; +       Size = 8; +       Align = 8; +       break; +    case RegPairInfo::FPR128: +       LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui; +       Size = 16; +       Align = 16; +       break; +    }      LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);                 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);                 dbgs() << ") -> fi#(" << RPI.FrameIdx;                 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;                 dbgs() << ")\n"); +    // Windows unwind codes require consecutive registers if registers are +    // paired.  Make the switch here, so that the code below will save (x,x+1) +    // and not (x+1,x). +    unsigned FrameIdxReg1 = RPI.FrameIdx; +    unsigned FrameIdxReg2 = RPI.FrameIdx + 1; +    if (NeedsWinCFI && RPI.isPaired()) { +      std::swap(Reg1, Reg2); +      std::swap(FrameIdxReg1, FrameIdxReg2); +    }      MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));      if (RPI.isPaired()) {        MIB.addReg(Reg2, getDefRegState(true));        MIB.addMemOperand(MF.getMachineMemOperand( -          MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), -          MachineMemOperand::MOLoad, 8, 8)); +          MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), +          MachineMemOperand::MOLoad, Size, Align));      }      MIB.addReg(Reg1, getDefRegState(true))          .addReg(AArch64::SP) -        .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit +        .addImm(RPI.Offset) // [sp, #offset*scale] +                            // where factor*scale is implicit          .setMIFlag(MachineInstr::FrameDestroy);      MIB.addMemOperand(MF.getMachineMemOperand( -        MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), -        MachineMemOperand::MOLoad, 8, 8)); +        MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), +        MachineMemOperand::MOLoad, Size, Align)); +    if (NeedsWinCFI) +      InsertSEH(MIB, TII, MachineInstr::FrameDestroy);    }; -    if (ReverseCSRRestoreSeq)      for (const RegPairInfo &RPI : reverse(RegPairs))        EmitMI(RPI); @@ -1406,30 +1965,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,    unsigned UnspilledCSGPRPaired = AArch64::NoRegister;    MachineFrameInfo &MFI = MF.getFrameInfo(); -  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); +  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();    unsigned BasePointerReg = RegInfo->hasBasePointer(MF)                                  ? RegInfo->getBaseRegister()                                  : (unsigned)AArch64::NoRegister; -  unsigned SpillEstimate = SavedRegs.count(); -  for (unsigned i = 0; CSRegs[i]; ++i) { -    unsigned Reg = CSRegs[i]; -    unsigned PairedReg = CSRegs[i ^ 1]; -    if (Reg == BasePointerReg) -      SpillEstimate++; -    if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) -      SpillEstimate++; -  } -  SpillEstimate += 2; // Conservatively include FP+LR in the estimate -  unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate; - -  // The frame record needs to be created by saving the appropriate registers -  if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) { -    SavedRegs.set(AArch64::FP); -    SavedRegs.set(AArch64::LR); -  } -    unsigned ExtraCSSpill = 0;    // Figure out which callee-saved registers to save/restore.    for (unsigned i = 0; CSRegs[i]; ++i) { @@ -1453,7 +1994,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,      // MachO's compact unwind format relies on all registers being stored in      // pairs.      // FIXME: the usual format is actually better if unwinding isn't needed. -    if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { +    if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister && +        !SavedRegs.test(PairedReg)) {        SavedRegs.set(PairedReg);        if (AArch64::GPR64RegClass.contains(PairedReg) &&            !RegInfo->isReservedReg(MF, PairedReg)) @@ -1461,6 +2003,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,      }    } +  // Calculates the callee saved stack size. +  unsigned CSStackSize = 0; +  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); +  const MachineRegisterInfo &MRI = MF.getRegInfo(); +  for (unsigned Reg : SavedRegs.set_bits()) +    CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8; + +  // Save number of saved regs, so we can easily update CSStackSize later. +  unsigned NumSavedRegs = SavedRegs.count(); + +  // The frame record needs to be created by saving the appropriate registers +  unsigned EstimatedStackSize = MFI.estimateStackSize(MF); +  if (hasFP(MF) || +      windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) { +    SavedRegs.set(AArch64::FP); +    SavedRegs.set(AArch64::LR); +  } +    LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";               for (unsigned Reg                    : SavedRegs.set_bits()) dbgs() @@ -1468,15 +2028,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,               dbgs() << "\n";);    // If any callee-saved registers are used, the frame cannot be eliminated. -  unsigned NumRegsSpilled = SavedRegs.count(); -  bool CanEliminateFrame = NumRegsSpilled == 0; +  bool CanEliminateFrame = SavedRegs.count() == 0;    // The CSR spill slots have not been allocated yet, so estimateStackSize    // won't include them. -  unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; -  LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");    unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); -  bool BigStack = (CFSize > EstimatedStackSizeLimit); +  bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;    if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))      AFI->setHasStackFrame(true); @@ -1497,7 +2054,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,        if (produceCompactUnwindFrame(MF))          SavedRegs.set(UnspilledCSGPRPaired);        ExtraCSSpill = UnspilledCSGPRPaired; -      NumRegsSpilled = SavedRegs.count();      }      // If we didn't find an extra callee-saved register to spill, create @@ -1514,9 +2070,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,      }    } +  // Adding the size of additional 64bit GPR saves. +  CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs); +  unsigned AlignedCSStackSize = alignTo(CSStackSize, 16); +  LLVM_DEBUG(dbgs() << "Estimated stack frame size: " +               << EstimatedStackSize + AlignedCSStackSize +               << " bytes.\n"); +    // Round up to register pair alignment to avoid additional SP adjustment    // instructions. -  AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); +  AFI->setCalleeSavedStackSize(AlignedCSStackSize); +  AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);  }  bool AArch64FrameLowering::enableStackSlotScavenging( @@ -1524,3 +2088,69 @@ bool AArch64FrameLowering::enableStackSlotScavenging(    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();    return AFI->hasCalleeSaveStackFreeSpace();  } + +void AArch64FrameLowering::processFunctionBeforeFrameFinalized( +    MachineFunction &MF, RegScavenger *RS) const { +  // If this function isn't doing Win64-style C++ EH, we don't need to do +  // anything. +  if (!MF.hasEHFunclets()) +    return; +  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +  MachineFrameInfo &MFI = MF.getFrameInfo(); +  WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); + +  MachineBasicBlock &MBB = MF.front(); +  auto MBBI = MBB.begin(); +  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) +    ++MBBI; + +  if (MBBI->isTerminator()) +    return; + +  // Create an UnwindHelp object. +  int UnwindHelpFI = +      MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); +  EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; +  // We need to store -2 into the UnwindHelp object at the start of the +  // function. +  DebugLoc DL; +  RS->enterBasicBlock(MBB); +  unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0); +  BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); +  BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) +      .addReg(DstReg, getKillRegState(true)) +      .addFrameIndex(UnwindHelpFI) +      .addImm(0); +} + +/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before +/// the update.  This is easily retrieved as it is exactly the offset that is set +/// in processFunctionBeforeFrameFinalized. +int AArch64FrameLowering::getFrameIndexReferencePreferSP( +    const MachineFunction &MF, int FI, unsigned &FrameReg, +    bool IgnoreSPUpdates) const { +  const MachineFrameInfo &MFI = MF.getFrameInfo(); +  LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " +                    << MFI.getObjectOffset(FI) << "\n"); +  FrameReg = AArch64::SP; +  return MFI.getObjectOffset(FI); +} + +/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve +/// the parent's frame pointer +unsigned AArch64FrameLowering::getWinEHParentFrameOffset( +    const MachineFunction &MF) const { +  return 0; +} + +/// Funclets only need to account for space for the callee saved registers, +/// as the locals are accounted for in the parent's stack frame. +unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( +    const MachineFunction &MF) const { +  // This is the size of the pushed CSRs. +  unsigned CSSize = +      MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize(); +  // This is the amount of stack a funclet needs to allocate. +  return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), +                 getStackAlignment()); +}  | 
