diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86FrameLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86FrameLowering.cpp | 448 |
1 files changed, 336 insertions, 112 deletions
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 799c1f5d1285..c7ca6fb2a4fc 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -17,6 +17,7 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -32,6 +33,12 @@ #include "llvm/Target/TargetOptions.h" #include <cstdlib> +#define DEBUG_TYPE "x86-fl" + +STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue"); +STATISTIC(NumFrameExtraProbe, + "Number of extra stack probes generated in prologue"); + using namespace llvm; X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, @@ -50,7 +57,8 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo().hasVarSizedObjects() && - !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); + !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() && + !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall(); } /// canSimplifyCallFramePseudos - If there is a reserved call frame, the @@ -60,6 +68,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { bool X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || + MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || (hasFP(MF) && !TRI->needsStackRealignment(MF)) || TRI->hasBasePointer(MF); } @@ -83,10 +92,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { bool X86FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); return (MF.getTarget().Options.DisableFramePointerElim(MF) || - TRI->needsStackRealignment(MF) || - MFI.hasVarSizedObjects() || + TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || + MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || MFI.hasStackMap() || MFI.hasPatchPoint() || MFI.hasCopyImplyingStackAdjustment()); @@ -257,7 +266,20 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, uint64_t Chunk = (1LL << 31) - 1; - if (Offset > Chunk) { + MachineFunction &MF = *MBB.getParent(); + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); + const X86TargetLowering &TLI = *STI.getTargetLowering(); + const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); + + // It's ok to not take into account large chunks when probing, as the + // allocation is split in smaller chunks anyway. + if (EmitInlineStackProbe && !InEpilogue) { + + // This pseudo-instruction is going to be expanded, potentially using a + // loop, by inlineStackProbe(). + BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset); + return; + } else if (Offset > Chunk) { // Rather than emit a long series of instructions for large offsets, // load the offset into a register and do one sub/add unsigned Reg = 0; @@ -381,8 +403,8 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( } else { bool IsSub = Offset < 0; uint64_t AbsOffset = IsSub ? -Offset : Offset; - unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) - : getADDriOpcode(Uses64BitFramePtr, AbsOffset); + const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) + : getADDriOpcode(Uses64BitFramePtr, AbsOffset); MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(AbsOffset); @@ -457,9 +479,32 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, .addCFIIndex(CFIIndex); } +/// Emits Dwarf Info specifying offsets of callee saved registers and +/// frame pointer. This is called only when basic block sections are enabled. +void X86FrameLowering::emitCalleeSavedFrameMoves( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + MachineFunction &MF = *MBB.getParent(); + if (!hasFP(MF)) { + emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); + return; + } + const MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + const unsigned FramePtr = TRI->getFrameRegister(MF); + const unsigned MachineFramePtr = + STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true); + // Offset = space for return address + size of the frame pointer itself. + unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); + BuildCFI(MBB, MBBI, DebugLoc{}, + MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset)); + emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); +} + void X86FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL) const { + const DebugLoc &DL, bool IsPrologue) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); @@ -474,10 +519,15 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + + if (IsPrologue) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + } else { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createRestore(nullptr, DwarfReg)); + } } } @@ -488,7 +538,8 @@ void X86FrameLowering::emitStackProbe(MachineFunction &MF, const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); if (STI.isTargetWindowsCoreCLR()) { if (InProlog) { - emitStackProbeInlineStub(MF, MBB, MBBI, DL, true); + BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)) + .addImm(0 /* no explicit stack size */); } else { emitStackProbeInline(MF, MBB, MBBI, DL, false); } @@ -499,26 +550,13 @@ void X86FrameLowering::emitStackProbe(MachineFunction &MF, void X86FrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - const StringRef ChkStkStubSymbol = "__chkstk_stub"; - MachineInstr *ChkStkStub = nullptr; - - for (MachineInstr &MI : PrologMBB) { - if (MI.isCall() && MI.getOperand(0).isSymbol() && - ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) { - ChkStkStub = &MI; - break; - } - } - - if (ChkStkStub != nullptr) { - assert(!ChkStkStub->isBundled() && - "Not expecting bundled instructions here"); - MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator()); - assert(std::prev(MBBI) == ChkStkStub && - "MBBI expected after __chkstk_stub."); - DebugLoc DL = PrologMBB.findDebugLoc(MBBI); - emitStackProbeInline(MF, PrologMBB, MBBI, DL, true); - ChkStkStub->eraseFromParent(); + auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) { + return MI.getOpcode() == X86::STACKALLOC_W_PROBING; + }); + if (Where != PrologMBB.end()) { + DebugLoc DL = PrologMBB.findDebugLoc(Where); + emitStackProbeInline(MF, PrologMBB, Where, DL, true); + Where->eraseFromParent(); } } @@ -528,6 +566,167 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, const DebugLoc &DL, bool InProlog) const { const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); + if (STI.isTargetWindowsCoreCLR() && STI.is64Bit()) + emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog); + else + emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog); +} + +void X86FrameLowering::emitStackProbeInlineGeneric( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { + MachineInstr &AllocWithProbe = *MBBI; + uint64_t Offset = AllocWithProbe.getOperand(0).getImm(); + + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); + const X86TargetLowering &TLI = *STI.getTargetLowering(); + assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) && + "different expansion expected for CoreCLR 64 bit"); + + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + uint64_t ProbeChunk = StackProbeSize * 8; + + // Synthesize a loop or unroll it, depending on the number of iterations. + if (Offset > ProbeChunk) { + emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset); + } else { + emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset); + } +} + +void X86FrameLowering::emitStackProbeInlineGenericBlock( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + uint64_t Offset) const { + + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); + const X86TargetLowering &TLI = *STI.getTargetLowering(); + const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + uint64_t CurrentOffset = 0; + // 0 Thanks to return address being saved on the stack + uint64_t CurrentProbeOffset = 0; + + // For the first N - 1 pages, just probe. I tried to take advantage of + // natural probes but it implies much more logic and there was very few + // interesting natural probes to interleave. + while (CurrentOffset + StackProbeSize < Offset) { + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + + + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + NumFrameExtraProbe++; + CurrentOffset += StackProbeSize; + CurrentProbeOffset += StackProbeSize; + } + + uint64_t ChunkSize = Offset - CurrentOffset; + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(ChunkSize) + .setMIFlag(MachineInstr::FrameSetup); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. +} + +void X86FrameLowering::emitStackProbeInlineGenericLoop( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + uint64_t Offset) const { + assert(Offset && "null offset"); + + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); + const X86TargetLowering &TLI = *STI.getTargetLowering(); + const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; + const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); + + // Synthesize a loop + NumFrameLoopProbe++; + const BasicBlock *LLVM_BB = MBB.getBasicBlock(); + + MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator MBBIter = ++MBB.getIterator(); + MF.insert(MBBIter, testMBB); + MF.insert(MBBIter, tailMBB); + + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + + // save loop bound + { + const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); + BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed) + .addReg(FinalStackProbed) + .addImm(Offset / StackProbeSize * StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); + } + + // allocate a page + { + const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); + BuildMI(testMBB, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(StackProbeSize) + .setMIFlag(MachineInstr::FrameSetup); + } + + // touch the page + addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc)) + .setMIFlag(MachineInstr::FrameSetup), + StackPtr, false, 0) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + + // cmp with stack pointer bound + BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) + .addReg(StackPtr) + .addReg(FinalStackProbed) + .setMIFlag(MachineInstr::FrameSetup); + + // jump + BuildMI(testMBB, DL, TII.get(X86::JCC_1)) + .addMBB(testMBB) + .addImm(X86::COND_NE) + .setMIFlag(MachineInstr::FrameSetup); + testMBB->addSuccessor(testMBB); + testMBB->addSuccessor(tailMBB); + + // BB management + tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end()); + tailMBB->transferSuccessorsAndUpdatePHIs(&MBB); + MBB.addSuccessor(testMBB); + + // handle tail + unsigned TailOffset = Offset % StackProbeSize; + if (TailOffset) { + const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); + BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(TailOffset) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Update Live In information + recomputeLiveIns(*testMBB); + recomputeLiveIns(*tailMBB); +} + +void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { + const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); assert(STI.is64Bit() && "different expansion needed for 32 bit"); assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR"); const TargetInstrInfo &TII = *STI.getInstrInfo(); @@ -765,10 +964,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; - // FIXME: Add retpoline support and remove this. - if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) + // FIXME: Add indirect thunk support and remove this. + if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) report_fatal_error("Emitting stack probe calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and indirect thunks not yet implemented."); unsigned CallOp; if (Is64Bit) @@ -821,16 +1020,6 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, } } -void X86FrameLowering::emitStackProbeInlineStub( - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { - - assert(InProlog && "ChkStkStub called outside prolog!"); - - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("__chkstk_stub"); -} - static unsigned calculateSetFPREG(uint64_t SPAdjust) { // Win64 ABI has a less restrictive limitation of 240; 128 works equally well // and might require smaller successive adjustments. @@ -846,15 +1035,15 @@ static unsigned calculateSetFPREG(uint64_t SPAdjust) { // go with the minimum SlotSize. uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - uint64_t MaxAlign = MFI.getMaxAlignment(); // Desired stack alignment. - unsigned StackAlign = getStackAlignment(); + Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. + Align StackAlign = getStackAlign(); if (MF.getFunction().hasFnAttribute("stackrealign")) { if (MFI.hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else if (MaxAlign < SlotSize) - MaxAlign = SlotSize; + MaxAlign = Align(SlotSize); } - return MaxAlign; + return MaxAlign.value(); } void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, @@ -1014,7 +1203,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); + const bool EmitStackProbeCall = + STI.getTargetLowering()->hasStackProbeSymbol(MF); unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); // Re-align the stack on 64-bit if the x86-interrupt calling convention is @@ -1032,11 +1222,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). We also check that we don't // push and pop from the stack. - if (has128ByteRedZone(MF) && - !TRI->needsStackRealignment(MF) && + if (has128ByteRedZone(MF) && !TRI->needsStackRealignment(MF) && !MFI.hasVarSizedObjects() && // No dynamic alloca. !MFI.adjustsStack() && // No calls. - !UseStackProbe && // No stack probes. + !EmitStackProbeCall && // No stack probes. !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); @@ -1115,7 +1304,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Define the current CFA rule to use the provided offset. assert(StackSize); BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth)); // Change the rule for the FramePtr to be an "offset" rule. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); @@ -1192,7 +1381,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Define the current CFA rule to use the provided offset. assert(StackSize); BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset)); StackOffset += stackGrowth; } @@ -1237,7 +1426,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, uint64_t AlignedNumBytes = NumBytes; if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); - if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { + if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { assert(!X86FI->getUsesRedZone() && "The Red Zone is not accounted for in stack probes"); @@ -1323,17 +1512,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher), Establisher, false, PSPSlotOffset) .addMemOperand(MF.getMachineMemOperand( - NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize)); + NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize))); ; // Save the root establisher back into the current funclet's (mostly // empty) frame, in case a sub-funclet or the GC needs it. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false, PSPSlotOffset) .addReg(Establisher) - .addMemOperand( - MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore | - MachineMemOperand::MOVolatile, - SlotSize, SlotSize)); + .addMemOperand(MF.getMachineMemOperand( + NoInfo, + MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, + SlotSize, Align(SlotSize))); } SPOrEstablisher = Establisher; } else { @@ -1370,7 +1559,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // into the registration node so that the runtime will restore it for us. if (!MBB.isCleanupFuncletEntry()) { assert(Personality == EHPersonality::MSVC_CXX); - unsigned FrameReg; + Register FrameReg; int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg); // ESP is the first field, so no extra displacement is needed. @@ -1389,7 +1578,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { if (X86::FR64RegClass.contains(Reg)) { int Offset; - unsigned IgnoredFrameReg; + Register IgnoredFrameReg; if (IsWin64Prologue && IsFunclet) Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); else @@ -1423,7 +1612,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .addReg(StackPtr) .addMemOperand(MF.getMachineMemOperand( PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, - SlotSize, SlotSize)); + SlotSize, Align(SlotSize))); } // Realign stack after we spilled callee-saved registers (so that we'll be @@ -1464,7 +1653,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // it recovers the frame pointer from the base pointer rather than the // other way around. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - unsigned UsedReg; + Register UsedReg; int Offset = getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); assert(UsedReg == BasePtr); @@ -1479,12 +1668,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. assert(StackSize); - BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( - nullptr, -StackSize + stackGrowth)); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth)); } // Emit DWARF info specifying the offsets of the callee-saved registers. - emitCalleeSavedFrameMoves(MBB, MBBI, DL); + emitCalleeSavedFrameMoves(MBB, MBBI, DL, true); } // X86 Interrupt handling function cannot assume anything about the direction @@ -1541,7 +1731,7 @@ static bool isFuncletReturnInstr(MachineInstr &MI) { unsigned X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); - unsigned SPReg; + Register SPReg; int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg, /*IgnoreSPUpdates*/ true); assert(Offset >= 0 && SPReg == TRI->getStackRegister()); @@ -1573,7 +1763,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { // RBP is not included in the callee saved register block. After pushing RBP, // everything is 16 byte aligned. Everything we allocate before an outgoing // call must also be 16 byte aligned. - unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment()); + unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign()); // Subtract out the size of the callee saved registers. This is how much stack // each funclet will allocate. return FrameSizeMinusRBP + XMMSize - CSSize; @@ -1634,6 +1824,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } uint64_t SEHStackAllocAmt = NumBytes; + // AfterPop is the position to insert .cfi_restore. + MachineBasicBlock::iterator AfterPop = MBBI; if (HasFP) { // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), @@ -1642,8 +1834,15 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (NeedsDwarfCFI) { unsigned DwarfStackPtr = TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); - BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa( - nullptr, DwarfStackPtr, -SlotSize)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize)); + if (!MBB.succ_empty() && !MBB.isReturnBlock()) { + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI(MBB, AfterPop, DL, + MCCFIInstruction::createRestore(nullptr, DwarfFramePtr)); + --MBBI; + --AfterPop; + } --MBBI; } } @@ -1711,8 +1910,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); if (!hasFP(MF) && NeedsDwarfCFI) { // Define the current CFA rule to use the provided offset. - BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( - nullptr, -CSSize - SlotSize)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize)); } --MBBI; } @@ -1738,11 +1937,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Opc == X86::POP32r || Opc == X86::POP64r) { Offset += SlotSize; BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaOffset(nullptr, Offset)); + MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); } } } + // Emit DWARF info specifying the restores of the callee-saved registers. + // For epilogue with return inside or being other block without successor, + // no need to generate .cfi_restore for callee-saved registers. + if (NeedsDwarfCFI && !MBB.succ_empty() && !MBB.isReturnBlock()) { + emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false); + } + if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { // Add the return addr area delta back since we are not tail calling. int Offset = -1 * X86FI->getTCReturnAddrDelta(); @@ -1756,7 +1962,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const { + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); bool IsFixed = MFI.isFixedObjectIndex(FI); @@ -1821,7 +2027,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // Skip the saved EBP. return Offset + SlotSize + FPDelta; } else { - assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0); + assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); return Offset + StackSize; } } else if (TRI->needsStackRealignment(MF)) { @@ -1829,7 +2035,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // Skip the saved EBP. return Offset + SlotSize + FPDelta; } else { - assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0); + assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); return Offset + StackSize; } // FIXME: Support tail calls @@ -1849,8 +2055,8 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset + FPDelta; } -int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, - int FI, unsigned &FrameReg) const { +int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); @@ -1860,21 +2066,21 @@ int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, return getFrameIndexReference(MF, FI, FrameReg); FrameReg = TRI->getStackRegister(); - return alignDown(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second; + return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) + + it->second; } int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, - int FI, unsigned &FrameReg, + int FI, Register &FrameReg, int Adjustment) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); FrameReg = TRI->getStackRegister(); return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment; } -int -X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, - int FI, unsigned &FrameReg, - bool IgnoreSPUpdates) const { +int X86FrameLowering::getFrameIndexReferencePreferSP( + const MachineFunction &MF, int FI, Register &FrameReg, + bool IgnoreSPUpdates) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); // Does not include any dynamic realign. @@ -1985,7 +2191,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( if (this->TRI->hasBasePointer(MF)) { // Allocate a spill slot for EBP if we have a base pointer and EH funclets. if (MF.hasEHFunclets()) { - int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize); + int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); X86FI->setHasSEHFramePtrSave(true); X86FI->setSEHFramePtrSaveIndex(FI); } @@ -2038,16 +2244,16 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); unsigned Size = TRI->getSpillSize(*RC); - unsigned Align = TRI->getSpillAlignment(*RC); + Align Alignment = TRI->getSpillAlign(*RC); // ensure alignment assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86"); - SpillSlotOffset = -alignTo(-SpillSlotOffset, Align); + SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment); // spill into slot SpillSlotOffset -= Size; int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); CSI[i - 1].setFrameIdx(SlotIndex); - MFI.ensureMaxAlignment(Align); + MFI.ensureMaxAlignment(Alignment); // Save the start offset and size of XMM in stack frame for funclets. if (X86::VR128RegClass.contains(Reg)) { @@ -2061,8 +2267,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( bool X86FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { + ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(MI); // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI @@ -2161,10 +2366,9 @@ void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, CatchRetTarget->setHasAddressTaken(); } -bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { +bool X86FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2493,9 +2697,9 @@ void X86FrameLowering::adjustForSegmentedStacks( // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. // FIXME: Add retpoline support and remove the error here.. - if (STI.useRetpolineIndirectCalls()) + if (STI.useIndirectThunkCalls()) report_fatal_error("Emitting morestack calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and thunks not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) @@ -2799,6 +3003,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, I = MBB.erase(I); auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); + // Try to avoid emitting dead SP adjustments if the block end is unreachable, + // typically because the function is marked noreturn (abort, throw, + // assert_fail, etc). + if (isDestroy && blockEndIsUnreachable(MBB, I)) + return I; + if (!reserveCallFrame) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub ESP, <amt>' and the @@ -2807,8 +3017,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned StackAlign = getStackAlignment(); - Amount = alignTo(Amount, StackAlign); + Amount = alignTo(Amount, getStackAlign()); const Function &F = MF.getFunction(); bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); @@ -2881,13 +3090,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, return I; } - if (isDestroy && InternalAmt && !blockEndIsUnreachable(MBB, I)) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. We do this until we have - // more advanced stack pointer tracking ability. - // We are not tracking the stack pointer adjustment by the callee, so make - // sure we restore the stack pointer immediately after the call, there may - // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + if (InternalAmt) { MachineBasicBlock::iterator CI = I; MachineBasicBlock::iterator B = MBB.begin(); while (CI != B && !std::prev(CI)->isCall()) @@ -2964,7 +3167,7 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( .setMIFlag(MachineInstr::FrameSetup); } - unsigned UsedReg; + Register UsedReg; int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg); int EndOffset = -EHRegOffset - EHRegSize; FuncInfo.EHRegNodeEndOffset = EndOffset; @@ -3003,8 +3206,8 @@ int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { return TRI->getSlotSize(); } -unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) - const { +Register +X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const { return TRI->getDwarfRegNum(StackPtr, true); } @@ -3014,7 +3217,7 @@ struct X86FrameSortingObject { bool IsValid = false; // true if we care about this Object. unsigned ObjectIndex = 0; // Index of Object into MFI list. unsigned ObjectSize = 0; // Size of Object in bytes. - unsigned ObjectAlignment = 1; // Alignment of Object in bytes. + Align ObjectAlignment = Align(1); // Alignment of Object in bytes. unsigned ObjectNumUses = 0; // Object static number of uses. }; @@ -3099,7 +3302,7 @@ void X86FrameLowering::orderFrameObjects( for (auto &Obj : ObjectsToAllocate) { SortingObjects[Obj].IsValid = true; SortingObjects[Obj].ObjectIndex = Obj; - SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlignment(Obj); + SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj); // Set the size. int ObjectSize = MFI.getObjectSize(Obj); if (ObjectSize == 0) @@ -3192,7 +3395,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized( int FrameIndex = H.CatchObj.FrameIndex; if (FrameIndex != INT_MAX) { // Ensure alignment. - unsigned Align = MFI.getObjectAlignment(FrameIndex); + unsigned Align = MFI.getObjectAlign(FrameIndex).value(); MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align; MinFixedObjOffset -= MFI.getObjectSize(FrameIndex); MFI.setObjectOffset(FrameIndex, MinFixedObjOffset); @@ -3219,3 +3422,24 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized( UnwindHelpFI) .addImm(-2); } + +void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS) const { + if (STI.is32Bit() && MF.hasEHFunclets()) + restoreWinEHStackPointersInParent(MF); +} + +void X86FrameLowering::restoreWinEHStackPointersInParent( + MachineFunction &MF) const { + // 32-bit functions have to restore stack pointers when control is transferred + // back to the parent function. These blocks are identified as eh pads that + // are not funclet entries. + bool IsSEH = isAsynchronousEHPersonality( + classifyEHPersonality(MF.getFunction().getPersonalityFn())); + for (MachineBasicBlock &MBB : MF) { + bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry(); + if (NeedsRestore) + restoreWin32EHStackPointers(MBB, MBB.begin(), DebugLoc(), + /*RestoreSP=*/IsSEH); + } +} |