diff options
Diffstat (limited to 'lib/Target/X86/X86FrameLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 585 |
1 files changed, 382 insertions, 203 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index a06ba9d750a9..8c029a8c22d5 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -45,12 +46,12 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { bool X86FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo &MMI = MF.getMMI(); - const TargetRegisterInfo *RegInfo = TM.getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); return (MF.getTarget().Options.DisableFramePointerElim(MF) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() || + MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.callsEHReturn()); } @@ -107,8 +108,10 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, unsigned Opc = MBBI->getOpcode(); switch (Opc) { default: return 0; - case X86::RET: - case X86::RETI: + case X86::RETL: + case X86::RETQ: + case X86::RETIL: + case X86::RETIQ: case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: @@ -180,7 +183,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } - MachineInstr *MI = NULL; + MachineInstr *MI = nullptr; if (UseLEA) { MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), @@ -202,10 +205,10 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. static void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { + unsigned StackPtr, uint64_t *NumBytes = nullptr) { if (MBBI == MBB.begin()) return; - MachineBasicBlock::iterator PI = prior(MBBI); + MachineBasicBlock::iterator PI = std::prev(MBBI); unsigned Opc = PI->getOpcode(); if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || @@ -223,17 +226,18 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } -/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. +/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower +/// iterator. static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { + unsigned StackPtr, uint64_t *NumBytes = nullptr) { // FIXME: THIS ISN'T RUN!!! return; if (MBBI == MBB.end()) return; - MachineBasicBlock::iterator NI = llvm::next(MBBI); + MachineBasicBlock::iterator NI = std::next(MBBI); if (NI == MBB.end()) return; unsigned Opc = NI->getOpcode(); @@ -255,19 +259,19 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, } /// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the -/// stack adjustment is returned as a positive value for ADD/LEA and a negative for -/// SUB. +/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and +/// the stack adjustment is returned as a positive value for ADD/LEA and a +/// negative for SUB. static int mergeSPUpdates(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, - bool doMergeWithPrevious) { + MachineBasicBlock::iterator &MBBI, unsigned StackPtr, + bool doMergeWithPrevious) { if ((doMergeWithPrevious && MBBI == MBB.begin()) || (!doMergeWithPrevious && MBBI == MBB.end())) return 0; - MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); + MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; + MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr + : std::next(MBBI); unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -302,66 +306,32 @@ static bool isEAXLiveIn(MachineFunction &MF) { return false; } -void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, - MCSymbol *Label, - unsigned FramePtr) const { +void +X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // Add callee saved registers to move list. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = -RegInfo->getSlotSize(); - - // FIXME: This is dirty hack. The code itself is pretty mess right now. - // It should be rewritten from scratch and generalized sometimes. - - // Determine maximum offset (minimum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector<CalleeSavedInfo>::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; - - // Don't output a new machine move if we're re-saving the frame - // pointer. This happens when the PrologEpilogInserter has inserted an extra - // "PUSH" of the frame pointer -- the "emitPrologue" method automatically - // generates one when frame pointers are used. If we generate a "machine - // move" for this extra "PUSH", the linker will lose track of the fact that - // the frame pointer should have the value of the first "PUSH" when it's - // trying to unwind. - // - // FIXME: This looks inelegant. It's possibly correct, but it's covering up - // another bug. I.e., one where we generate a prolog like this: - // - // pushl %ebp - // movl %esp, %ebp - // pushl %ebp - // pushl %esi - // ... - // - // The immediate re-push of EBP is unnecessary. At the least, it's an - // optimization bug. EBP can be used as a scratch register in certain - // cases, but probably not when we have a frame pointer. - if (HasFP && FramePtr == Reg) - continue; unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - MMI.addFrameInst(MCCFIInstruction::createOffset(Label, DwarfReg, Offset)); + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, + Offset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } @@ -373,8 +343,9 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, static bool usesTheStack(const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), - re = MRI.reg_end(); ri != re; ++ri) + for (MachineRegisterInfo::reg_instr_iterator + ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); + ri != re; ++ri) if (ri->isCopy()) return true; @@ -385,23 +356,107 @@ static bool usesTheStack(const MachineFunction &MF) { /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to /// generate the exception handling frames. + +/* + Here's a gist of what gets emitted: + + ; Establish frame pointer, if needed + [if needs FP] + push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + .seh_pushreg %rpb + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + + ; Spill general-purpose registers + [for all callee-saved GPRs] + pushq %<reg> + [if not needs FP] + .cfi_def_cfa_offset (offset from RETADDR) + .seh_pushreg %<reg> + + ; If the required stack alignment > default stack alignment + ; rsp needs to be re-aligned. This creates a "re-alignment gap" + ; of unknown size in the stack frame. + [if stack needs re-alignment] + and $MASK, %rsp + + ; Allocate space for locals + [if target is Windows and allocated space > 4096 bytes] + ; Windows needs special care for allocations larger + ; than one page. + mov $NNN, %rax + call ___chkstk_ms/___chkstk + sub %rax, %rsp + [else] + sub $NNN, %rsp + + [if needs FP] + .seh_stackalloc (size of XMM spill slots) + .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots + [else] + .seh_stackalloc NNN + + ; Spill XMMs + ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, + ; they may get spilled on any platform, if the current function + ; calls @llvm.eh.unwind.init + [if needs FP] + [for all callee-saved XMM registers] + movaps %<xmm reg>, -MMM(%rbp) + [for all callee-saved XMM registers] + .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) + ; i.e. the offset relative to (%rbp - SEHFrameOffset) + [else] + [for all callee-saved XMM registers] + movaps %<xmm reg>, KKK(%rsp) + [for all callee-saved XMM registers] + .seh_savexmm %<xmm reg>, KKK + + .seh_endprologue + + [if needs base pointer] + mov %rsp, %rbx + + ; Emit CFI info + [if needs FP] + [for all callee-saved registers] + .cfi_offset %<reg>, (offset from %rbp) + [else] + .cfi_def_cfa_offset (offset from RETADDR) + [for all callee-saved registers] + .cfi_offset %<reg>, (offset from %rsp) + + Notes: + - .seh directives are emitted only for Windows 64 ABI + - .cfi directives are emitted for all other ABIs + - for 32-bit code, substitute %e?? registers for %r?? +*/ + void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); - const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - bool needsFrameMoves = MMI.hasDebugInfo() || - Fn->needsUnwindTableEntry(); uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool Is64Bit = STI.is64Bit(); bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); + bool IsWinEH = + MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() == + ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64. + bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry(); + bool NeedsDwarfCFI = + !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -439,7 +494,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone !usesTheStack(MF) && // Don't push and pop. - !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack + !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -499,21 +554,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(FramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark the place where EBP/RBP was saved. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) - .addSym(FrameLabel); - // Define the current CFA rule to use the provided offset. assert(StackSize); - MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(FrameLabel, 2 * stackGrowth)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); // Change the rule for the FramePtr to be an "offset" rule. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); - MMI.addFrameInst(MCCFIInstruction::createOffset(FrameLabel, DwarfFramePtr, - 2 * stackGrowth)); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, + DwarfFramePtr, 2 * stackGrowth)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + if (NeedsWinEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); } // Update EBP with the new base value. @@ -522,21 +584,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) - .addSym(FrameLabel); - // Define the current CFA to use the EBP/RBP register. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); - MMI.addFrameInst( - MCCFIInstruction::createDefCfaRegister(FrameLabel, DwarfFramePtr)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); - I != E; ++I) + // Mark the FramePtr as live-in in every block. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); } else { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); @@ -550,30 +609,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; - MBBI->setFlag(MachineInstr::FrameSetup); + unsigned Reg = MBBI->getOperand(0).getReg(); ++MBBI; - if (!HasFP && needsFrameMoves) { + if (!HasFP && NeedsDwarfCFI) { // Mark callee-saved push instruction. - MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); - // Define the current CFA rule to use the provided offset. assert(StackSize); - MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(Label, StackOffset)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); StackOffset += stackGrowth; } + + if (NeedsWinEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( + MachineInstr::FrameSetup); + } } // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). - - // NOTE: We push the registers before realigning the stack, so - // vector callee-saved (xmm) registers may be saved w/o proper - // alignment in this way. However, currently these regs are saved in - // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so - // this shouldn't be a problem. if (RegInfo->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); MachineInstr *MI = @@ -606,16 +663,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. - if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetEnvMacho()) { + if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetMacho()) { const char *StackProbeSymbol; - bool isSPUpdateNeeded = false; if (Is64Bit) { - if (STI.isTargetCygMing()) - StackProbeSymbol = "___chkstk"; - else { + if (STI.isTargetCygMing()) { + StackProbeSymbol = "___chkstk_ms"; + } else { StackProbeSymbol = "__chkstk"; - isSPUpdateNeeded = true; } } else if (STI.isTargetCygMing()) StackProbeSymbol = "_alloca"; @@ -657,15 +712,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) .setMIFlag(MachineInstr::FrameSetup); - // MSVC x64's __chkstk does not adjust %rsp itself. - // It also does not clobber %rax so we can reuse it when adjusting %rsp. - if (isSPUpdateNeeded) { + if (Is64Bit) { + // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp + // themself. It also does not clobber %rax so we can reuse it when + // adjusting %rsp. BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) .addReg(StackPtr) .addReg(X86::RAX) .setMIFlag(MachineInstr::FrameSetup); } - if (isEAXAlive) { // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), @@ -674,38 +729,103 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } - } else if (NumBytes) + } else if (NumBytes) { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); + } + + int SEHFrameOffset = 0; + if (NeedsWinEH) { + if (HasFP) { + // We need to set frame base offset low enough such that all saved + // register offsets would be positive relative to it, but we can't + // just use NumBytes, because .seh_setframe offset must be <=240. + // So we pretend to have only allocated enough space to spill the + // non-volatile registers. + // We don't care about the rest of stack allocation, because unwinder + // will restore SP to (BP - SEHFrameOffset) + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + int offset = MFI->getObjectOffset(Info.getFrameIdx()); + SEHFrameOffset = std::max(SEHFrameOffset, abs(offset)); + } + SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant + + // This only needs to account for XMM spill slots, GPR slots + // are covered by the .seh_pushreg's emitted above. + unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize(); + if (Size) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(Size) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) + .addImm(FramePtr) + .addImm(SEHFrameOffset) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // SP will be the base register for restoring XMMs + if (NumBytes) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } + + // Skip the rest of register spilling code + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + // Emit SEH info for non-GPRs + if (NeedsWinEH) { + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + unsigned Reg = Info.getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class"); + + int Offset = getFrameIndexOffset(MF, Info.getFrameIdx()); + Offset += SEHFrameOffset; + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) + .addImm(Reg) + .addImm(Offset) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) + .setMIFlag(MachineInstr::FrameSetup); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. + // Update the base pointer with the current stack pointer. unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { + if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. - MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) - .addSym(Label); - if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. assert(StackSize); - MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset( - Label, -StackSize + stackGrowth)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, + -StackSize + stackGrowth)); + + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } // Emit DWARF info specifying the offsets of the callee-saved registers. if (PushedRegs) - emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); + emitCalleeSavedFrameMoves(MBB, MBBI, DL); } } @@ -713,12 +833,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); - const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI != MBB.end() && "Returning block has no instructions"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool Is64Bit = STI.is64Bit(); bool IsLP64 = STI.isTarget64BitLP64(); bool UseLEA = STI.useLeaForSP(); @@ -730,8 +852,10 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, switch (RetOpcode) { default: llvm_unreachable("Can only insert epilog into returning blocks"); - case X86::RET: - case X86::RETI: + case X86::RETQ: + case X86::RETL: + case X86::RETIL: + case X86::RETIQ: case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: @@ -781,7 +905,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PI = prior(MBBI); + MachineBasicBlock::iterator PI = std::prev(MBBI); unsigned Opc = PI->getOpcode(); if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && @@ -883,12 +1007,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, addReg(JumpTarget.getReg(), RegState::Kill); } - MachineInstr *NewMI = prior(MBBI); + MachineInstr *NewMI = std::prev(MBBI); NewMI->copyImplicitOps(MF, MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); - } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && + } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL || + RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) && (X86FI->getTCReturnAddrDelta() < 0)) { // Add the return addr area delta back since we are not tail calling. int delta = -1*X86FI->getTCReturnAddrDelta(); @@ -901,7 +1026,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { +int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -959,46 +1085,97 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return getFrameIndexOffset(MF, FI); } -bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; +bool X86FrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); + unsigned SlotSize = RegInfo->getSlotSize(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - DebugLoc DL = MBB.findDebugLoc(MI); + unsigned CalleeSavedFrameSize = 0; + int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); - MachineFunction &MF = *MBB.getParent(); + if (hasFP(MF)) { + // emitPrologue always spills frame register the first thing. + SpillSlotOffset -= SlotSize; + MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + + // Since emitPrologue and emitEpilogue will handle spilling and restoring of + // the frame register, we can delete it from CSI list and not have to worry + // about avoiding it later. + unsigned FPReg = RegInfo->getFrameRegister(MF); + for (unsigned i = 0; i < CSI.size(); ++i) { + if (CSI[i].getReg() == FPReg) { + CSI.erase(CSI.begin() + i); + break; + } + } + } - unsigned SlotSize = STI.is64Bit() ? 8 : 4; - unsigned FPReg = TRI->getFrameRegister(MF); - unsigned CalleeFrameSize = 0; + // Assign slots for GPRs. It increases frame size. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) + continue; + + SpillSlotOffset -= SlotSize; + CalleeSavedFrameSize += SlotSize; + + int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + } + + X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); + + // Assign slots for XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + // ensure alignment + SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment(); + // spill into slot + SpillSlotOffset -= RC->getSize(); + int SlotIndex = + MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + MFI->ensureMaxAlignment(RC->getAlignment()); + } + + return true; +} + +bool X86FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - if (!X86::GR64RegClass.contains(Reg) && - !X86::GR32RegClass.contains(Reg)) + unsigned Reg = CSI[i - 1].getReg(); + + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (Reg == FPReg) - // X86RegisterInfo::emitPrologue will handle spilling of frame register. - continue; - CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); - // Make XMM regs spilled. X86 does not have ability of push/pop XMM. // It can be done by spilling XMMs to stack frame. - // Note that only Win64 ABI might spill XMMs. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (X86::GR64RegClass.contains(Reg) || @@ -1007,8 +1184,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, TRI); + + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, + TRI); + --MI; + MI->setFlag(MachineInstr::FrameSetup); + ++MI; } return true; @@ -1025,6 +1206,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); // Reload XMMs from stack frame. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { @@ -1032,22 +1214,19 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); } // POP GPRs. - unsigned FPReg = TRI->getFrameRegister(MF); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; - if (Reg == FPReg) - // X86RegisterInfo::emitEpilogue will handle restoring of frame register. - continue; + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); } return true; @@ -1055,9 +1234,10 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, void X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); unsigned SlotSize = RegInfo->getSlotSize(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); @@ -1077,22 +1257,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, TailCallReturnAddrDelta - SlotSize, true); } - if (hasFP(MF)) { - assert((TailCallReturnAddrDelta <= 0) && - "The Delta should always be zero or negative"); - const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); - - // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MFI->CreateFixedObject(SlotSize, - -(int)SlotSize + - TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta, - true); - assert(FrameIdx == MFI->getObjectIndexBegin() && - "Slot for EBP register must be last in order to be found!"); - (void)FrameIdx; - } - // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); @@ -1150,8 +1314,9 @@ void X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MachineBasicBlock &prologueMBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const X86InstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); uint64_t StackSize; + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool Is64Bit = STI.is64Bit(); unsigned TlsReg, TlsOffset; DebugLoc DL; @@ -1163,9 +1328,18 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); if (!STI.isTargetLinux() && !STI.isTargetDarwin() && - !STI.isTargetWin32() && !STI.isTargetFreeBSD()) + !STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD()) report_fatal_error("Segmented stacks not supported on this platform."); + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); @@ -1190,11 +1364,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.push_front(allocMBB); MF.push_front(checkMBB); - // Eventually StackSize will be calculated by a link-time pass; which will - // also decide whether checking code needs to be injected into this particular - // prologue. - StackSize = MFI->getStackSize(); - // When the frame size is less than 256 we just compare the stack // boundary directly to the value of the stack pointer, per gcc. bool CompareStackPointer = StackSize < kSplitStackAvailable; @@ -1207,6 +1376,9 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. + } else if (STI.isTargetWin64()) { + TlsReg = X86::GS; + TlsOffset = 0x28; // pvArbitrary, reserved for application use } else if (STI.isTargetFreeBSD()) { TlsReg = X86::FS; TlsOffset = 0x18; @@ -1244,27 +1416,28 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) .addImm(1).addReg(0).addImm(-StackSize).addReg(0); - if (STI.isTargetLinux() || STI.isTargetWin32()) { + if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64()) { BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else if (STI.isTargetDarwin()) { - // TlsOffset doesn't fit into a mod r/m byte so we need an extra register + // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. unsigned ScratchReg2; bool SaveScratch2; if (CompareStackPointer) { - // The primary scratch register is available for holding the TLS offset + // The primary scratch register is available for holding the TLS offset. ScratchReg2 = GetScratchRegister(Is64Bit, MF, true); SaveScratch2 = false; } else { // Need to use a second register to hold the TLS offset ScratchReg2 = GetScratchRegister(Is64Bit, MF, false); - // Unfortunately, with fastcc the second scratch register may hold an arg + // Unfortunately, with fastcc the second scratch register may hold an + // argument. SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); } - // If Scratch2 is live-in then it needs to be saved + // If Scratch2 is live-in then it needs to be saved. assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && "Scratch register is live-in and not saved"); @@ -1341,18 +1514,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) /// /// CheckStack: -/// temp0 = sp - MaxStack -/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart /// OldStart: -/// ... +/// ... /// IncStack: -/// call inc_stack # doubles the stack space -/// temp0 = sp - MaxStack -/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// call inc_stack # doubles the stack space +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { - const X86InstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize(); + const unsigned SlotSize = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()) + ->getSlotSize(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); const bool Is64Bit = STI.is64Bit(); DebugLoc DL; // HiPE-specific values @@ -1481,12 +1657,14 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { void X86FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - const X86InstrInfo &TII = *TM.getInstrInfo(); - const X86RegisterInfo &RegInfo = *TM.getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const X86RegisterInfo &RegInfo = + *static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); unsigned StackPtr = RegInfo.getStackRegister(); bool reseveCallFrame = hasReservedCallFrame(MF); int Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool IsLP64 = STI.isTarget64BitLP64(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; @@ -1504,10 +1682,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + MF.getTarget().getFrameLowering()->getStackAlignment(); Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - MachineInstr *New = 0; + MachineInstr *New = nullptr; if (Opcode == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr) @@ -1552,7 +1731,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // sure we restore the stack pointer immediately after the call, there may // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. MachineBasicBlock::iterator B = MBB.begin(); - while (I != B && !llvm::prior(I)->isCall()) + while (I != B && !std::prev(I)->isCall()) --I; MBB.insert(I, New); } |