diff options
Diffstat (limited to 'lib/Target/X86/X86FrameLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 65 |
1 files changed, 42 insertions, 23 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index a257ec41f75b..e207c343fac8 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -68,7 +68,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { // needsFrameIndexResolution - Do we need to perform FI resolution for // this function. Normally, this is required only when the function // has any stack objects. However, FI resolution actually has another job, -// not apparent from the title - it resolves callframesetup/destroy +// not apparent from the title - it resolves callframesetup/destroy // that were not simplified earlier. // So, this is required for x86 functions that have push sequences even // when there are no stack objects. @@ -607,8 +607,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, int64_t RCXShadowSlot = 0; int64_t RDXShadowSlot = 0; - // If inlining in the prolog, save RCX and RDX. - // Future optimization: don't save or restore if not live in. + // If inlining in the prolog, save RCX and RDX. if (InProlog) { // Compute the offsets. We need to account for things already // pushed onto the stack at this point: return address, frame @@ -616,15 +615,30 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); const bool HasFP = hasFP(MF); - RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); - RDXShadowSlot = RCXShadowSlot + 8; - // Emit the saves. - addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, - RCXShadowSlot) - .addReg(X86::RCX); - addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, - RDXShadowSlot) - .addReg(X86::RDX); + + // Check if we need to spill RCX and/or RDX. + // Here we assume that no earlier prologue instruction changes RCX and/or + // RDX, so checking the block live-ins is enough. + const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); + const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); + int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); + // Assign the initial slot to both registers, then change RDX's slot if both + // need to be spilled. + if (IsRCXLiveIn) + RCXShadowSlot = InitSlot; + if (IsRDXLiveIn) + RDXShadowSlot = InitSlot; + if (IsRDXLiveIn && IsRCXLiveIn) + RDXShadowSlot += 8; + // Emit the saves if needed. + if (IsRCXLiveIn) + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RCXShadowSlot) + .addReg(X86::RCX); + if (IsRDXLiveIn) + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RDXShadowSlot) + .addReg(X86::RDX); } else { // Not in the prolog. Copy RAX to a virtual reg. BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); @@ -661,6 +675,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB); // Add code to roundMBB to round the final stack pointer to a page boundary. + RoundMBB->addLiveIn(FinalReg); BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) .addReg(FinalReg) .addImm(PageMask); @@ -677,6 +692,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, .addMBB(LoopMBB); } + LoopMBB->addLiveIn(JoinReg); addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, false, -PageSize); @@ -688,6 +704,8 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, .addImm(0) .addReg(0) .addImm(0); + + LoopMBB->addLiveIn(RoundedReg); BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) .addReg(RoundedReg) .addReg(ProbeReg); @@ -697,16 +715,19 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, // If in prolog, restore RDX and RCX. if (InProlog) { - addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), - X86::RCX), - X86::RSP, false, RCXShadowSlot); - addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), - X86::RDX), - X86::RSP, false, RDXShadowSlot); + if (RCXShadowSlot) // It means we spilled RCX in the prologue. + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, + TII.get(X86::MOV64rm), X86::RCX), + X86::RSP, false, RCXShadowSlot); + if (RDXShadowSlot) // It means we spilled RDX in the prologue. + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, + TII.get(X86::MOV64rm), X86::RDX), + X86::RSP, false, RDXShadowSlot); } // Now that the probing is done, add code to continueMBB to update // the stack pointer for real. + ContinueMBB->addLiveIn(SizeReg); BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) .addReg(X86::RSP) .addReg(SizeReg); @@ -734,8 +755,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, CMBBI->setFlag(MachineInstr::FrameSetup); } } - - // Possible TODO: physreg liveness for InProlog case. } void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, @@ -2694,7 +2713,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, Regs[FoundRegs++] = Regs[0]; for (int i = 0; i < NumPops; ++i) - BuildMI(MBB, MBBI, DL, + BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); return true; @@ -2984,7 +3003,7 @@ struct X86FrameSortingComparator { // in general. Something to keep in mind, though. if (DensityAScaled == DensityBScaled) return A.ObjectAlignment < B.ObjectAlignment; - + return DensityAScaled < DensityBScaled; } }; @@ -3020,7 +3039,7 @@ void X86FrameLowering::orderFrameObjects( if (ObjectSize == 0) // Variable size. Just use 4. SortingObjects[Obj].ObjectSize = 4; - else + else SortingObjects[Obj].ObjectSize = ObjectSize; } |