diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Target/X86/X86FrameLowering.cpp | |
parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) |
Notes
Diffstat (limited to 'lib/Target/X86/X86FrameLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 239 |
1 files changed, 162 insertions, 77 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 80b1cc192a88..a257ec41f75b 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -248,6 +248,7 @@ flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { /// stack pointer by a constant value. void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; @@ -255,7 +256,6 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy; uint64_t Chunk = (1LL << 31) - 1; - DebugLoc DL = MBB.findDebugLoc(MBBI); if (Offset > Chunk) { // Rather than emit a long series of instructions for large offsets, @@ -399,28 +399,30 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, return 0; MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr - : std::next(MBBI); + PI = skipDebugInstructionsBackward(PI, MBB.begin()); - if (NI != nullptr) - NI = skipDebugInstructionsForward(NI, MBB.end()); + // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI + // instruction, and that there are no DBG_VALUE or other instructions between + // ADD/SUB/LEA and its corresponding CFI instruction. + /* TODO: Add support for the case where there are multiple CFI instructions + below the ADD/SUB/LEA, e.g.: + ... + add + cfi_def_cfa_offset + cfi_offset + ... + */ + if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) + PI = std::prev(PI); unsigned Opc = PI->getOpcode(); int Offset = 0; - if (!doMergeWithPrevious && NI != MBB.end() && - NI->getOpcode() == TargetOpcode::CFI_INSTRUCTION) { - // Don't merge with the next instruction if it has CFI. - return Offset; - } - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && PI->getOperand(0).getReg() == StackPtr){ assert(PI->getOperand(1).getReg() == StackPtr); - Offset += PI->getOperand(2).getImm(); - MBB.erase(PI); - if (!doMergeWithPrevious) MBBI = NI; + Offset = PI->getOperand(2).getImm(); } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && PI->getOperand(0).getReg() == StackPtr && PI->getOperand(1).getReg() == StackPtr && @@ -428,17 +430,19 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, PI->getOperand(3).getReg() == X86::NoRegister && PI->getOperand(5).getReg() == X86::NoRegister) { // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. - Offset += PI->getOperand(4).getImm(); - MBB.erase(PI); - if (!doMergeWithPrevious) MBBI = NI; + Offset = PI->getOperand(4).getImm(); } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && PI->getOperand(0).getReg() == StackPtr) { assert(PI->getOperand(1).getReg() == StackPtr); - Offset -= PI->getOperand(2).getImm(); - MBB.erase(PI); - if (!doMergeWithPrevious) MBBI = NI; - } + Offset = -PI->getOperand(2).getImm(); + } else + return 0; + + PI = MBB.erase(PI); + if (PI != MBB.end() && PI->isCFIInstruction()) PI = MBB.erase(PI); + if (!doMergeWithPrevious) + MBBI = skipDebugInstructionsForward(PI, MBB.end()); return Offset; } @@ -741,6 +745,11 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; + // FIXME: Add retpoline support and remove this. + if (Is64Bit && IsLargeCodeModel && STI.useRetpoline()) + report_fatal_error("Emitting stack probe calls on 64-bit with the large " + "code model and retpoline not yet implemented."); + unsigned CallOp; if (Is64Bit) CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; @@ -993,7 +1002,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, Fn.arg_size() == 2) { StackSize += 8; MFI.setStackSize(StackSize); - emitSPUpdate(MBB, MBBI, -8, /*InEpilogue=*/false); + emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false); } // If this is x86-64 and the Red Zone is not disabled, if we are a leaf @@ -1208,30 +1217,34 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, bool isEAXAlive = isEAXLiveIn(MBB); if (isEAXAlive) { - // Sanity check that EAX is not livein for this function. - // It should not be, so throw an assert. - assert(!Is64Bit && "EAX is livein in x64 case!"); - - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); + if (Is64Bit) { + // Save RAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) + .addReg(X86::RAX, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + } } if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. - if (isUInt<32>(NumBytes)) { + int Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; + if (isUInt<32>(Alloc)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes) + .addImm(Alloc) .setMIFlag(MachineInstr::FrameSetup); - } else if (isInt<32>(NumBytes)) { + } else if (isInt<32>(Alloc)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) - .addImm(NumBytes) + .addImm(Alloc) .setMIFlag(MachineInstr::FrameSetup); } else { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(NumBytes) + .addImm(Alloc) .setMIFlag(MachineInstr::FrameSetup); } } else { @@ -1246,15 +1259,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, emitStackProbe(MF, MBB, MBBI, DL, true); if (isEAXAlive) { - // Restore EAX - MachineInstr *MI = - addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), - StackPtr, false, NumBytes - 4); + // Restore RAX/EAX + MachineInstr *MI; + if (Is64Bit) + MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX), + StackPtr, false, NumBytes - 8); + else + MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), + StackPtr, false, NumBytes - 4); MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false); + emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false); } if (NeedsWinCFI && NumBytes) { @@ -1560,6 +1577,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, bool HasFP = hasFP(MF); uint64_t NumBytes = 0; + bool NeedsDwarfCFI = + (!MF.getTarget().getTargetTriple().isOSDarwin() && + !MF.getTarget().getTargetTriple().isOSWindows()) && + (MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry()); + if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -1582,6 +1604,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) .setMIFlag(MachineInstr::FrameDestroy); + if (NeedsDwarfCFI) { + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa( + nullptr, DwarfStackPtr, -SlotSize)); + --MBBI; + } } MachineBasicBlock::iterator FirstCSPop = MBBI; @@ -1644,7 +1673,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true); + emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); + if (!hasFP(MF) && NeedsDwarfCFI) { + // Define the current CFA rule to use the provided offset. + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( + nullptr, -CSSize - SlotSize)); + } --MBBI; } @@ -1657,6 +1691,23 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (NeedsWin64CFI && MF.hasWinCFI()) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); + if (!hasFP(MF) && NeedsDwarfCFI) { + MBBI = FirstCSPop; + int64_t Offset = -CSSize - SlotSize; + // Mark callee-saved pop instruction. + // Define the current CFA rule to use the provided offset. + while (MBBI != MBB.end()) { + MachineBasicBlock::iterator PI = MBBI; + unsigned Opc = PI->getOpcode(); + ++MBBI; + if (Opc == X86::POP32r || Opc == X86::POP64r) { + Offset += SlotSize; + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaOffset(nullptr, Offset)); + } + } + } + if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { // Add the return addr area delta back since we are not tail calling. int Offset = -1 * X86FI->getTCReturnAddrDelta(); @@ -1664,7 +1715,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, Terminator, true); - emitSPUpdate(MBB, Terminator, Offset, /*InEpilogue=*/true); + emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true); } } } @@ -1855,6 +1906,32 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( unsigned CalleeSavedFrameSize = 0; int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); + int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + if (TailCallReturnAddrDelta < 0) { + // create RETURNADDR area + // arg + // arg + // RETADDR + // { ... + // RETADDR area + // ... + // } + // [EBP] + MFI.CreateFixedObject(-TailCallReturnAddrDelta, + TailCallReturnAddrDelta - SlotSize, true); + } + + // Spill the BasePtr if it's used. + if (this->TRI->hasBasePointer(MF)) { + // Allocate a spill slot for EBP if we have a base pointer and EH funclets. + if (MF.hasEHFunclets()) { + int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize); + X86FI->setHasSEHFramePtrSave(true); + X86FI->setSEHFramePtrSaveIndex(FI); + } + } + if (hasFP(MF)) { // emitPrologue always spills frame register the first thing. SpillSlotOffset -= SlotSize; @@ -1894,7 +1971,12 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment @@ -1961,9 +2043,15 @@ bool X86FrameLowering::spillCalleeSavedRegisters( unsigned Reg = CSI[i-1].getReg(); if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; + + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, TRI); @@ -2037,7 +2125,12 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + // If this is k-register make sure we lookup via the largest legal type. + MVT VT = MVT::Other; + if (X86::VK16RegClass.contains(Reg)) + VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); } @@ -2060,35 +2153,12 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - MachineFrameInfo &MFI = MF.getFrameInfo(); - - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - - if (TailCallReturnAddrDelta < 0) { - // create RETURNADDR area - // arg - // arg - // RETADDR - // { ... - // RETADDR area - // ... - // } - // [EBP] - MFI.CreateFixedObject(-TailCallReturnAddrDelta, - TailCallReturnAddrDelta - SlotSize, true); - } - // Spill the BasePtr if it's used. - if (TRI->hasBasePointer(MF)) { - SavedRegs.set(TRI->getBaseRegister()); - - // Allocate a spill slot for EBP if we have a base pointer and EH funclets. - if (MF.hasEHFunclets()) { - int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize); - X86FI->setHasSEHFramePtrSave(true); - X86FI->setSEHFramePtrSaveIndex(FI); - } + if (TRI->hasBasePointer(MF)){ + unsigned BasePtr = TRI->getBaseRegister(); + if (STI.isTarget64BitILP32()) + BasePtr = getX86SubSuperRegister(BasePtr, 64); + SavedRegs.set(BasePtr); } } @@ -2171,8 +2241,10 @@ void X86FrameLowering::adjustForSegmentedStacks( // prologue. StackSize = MFI.getStackSize(); - // Do not generate a prologue for functions with a stack of size zero - if (StackSize == 0) + // Do not generate a prologue for leaf functions with a stack of size zero. + // For non-leaf functions we have to allow for the possibility that the + // call is to a non-split function, as in PR37807. + if (StackSize == 0 && !MFI.hasTailCall()) return; MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); @@ -2345,6 +2417,10 @@ void X86FrameLowering::adjustForSegmentedStacks( // This solution is not perfect, as it assumes that the .rodata section // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. + // FIXME: Add retpoline support and remove the error here.. + if (STI.useRetpoline()) + report_fatal_error("Emitting morestack calls on 64-bit with the large " + "code model and retpoline not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) @@ -2683,7 +2759,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // Add Amount to SP to destroy a frame, or subtract to setup. int64_t StackAdjustment = isDestroy ? Amount : -Amount; - int64_t CfaAdjustment = -StackAdjustment; if (StackAdjustment) { // Merge with any previous or following adjustment instruction. Note: the @@ -2708,6 +2783,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // offset to be correct at each call site, while for debugging we want // it to be more precise. + int64_t CfaAdjustment = -StackAdjustment; // TODO: When not using precise CFA, we also need to adjust for the // InternalAmt here. if (CfaAdjustment) { @@ -2838,6 +2914,15 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( return MBBI; } +int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { + return TRI->getSlotSize(); +} + +unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) + const { + return TRI->getDwarfRegNum(StackPtr, true); +} + namespace { // Struct used by orderFrameObjects to help sort the stack objects. struct X86FrameSortingObject { @@ -2942,7 +3027,7 @@ void X86FrameLowering::orderFrameObjects( // Count the number of uses for each object. for (auto &MBB : MF) { for (auto &MI : MBB) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; for (const MachineOperand &MO : MI.operands()) { // Check to see if it's a local stack symbol. |