summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86FrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86FrameLowering.cpp')
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp65
1 files changed, 42 insertions, 23 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index a257ec41f75b..e207c343fac8 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -68,7 +68,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
// needsFrameIndexResolution - Do we need to perform FI resolution for
// this function. Normally, this is required only when the function
// has any stack objects. However, FI resolution actually has another job,
-// not apparent from the title - it resolves callframesetup/destroy
+// not apparent from the title - it resolves callframesetup/destroy
// that were not simplified earlier.
// So, this is required for x86 functions that have push sequences even
// when there are no stack objects.
@@ -607,8 +607,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
int64_t RCXShadowSlot = 0;
int64_t RDXShadowSlot = 0;
- // If inlining in the prolog, save RCX and RDX.
- // Future optimization: don't save or restore if not live in.
+ // If inlining in the prolog, save RCX and RDX.
if (InProlog) {
// Compute the offsets. We need to account for things already
// pushed onto the stack at this point: return address, frame
@@ -616,15 +615,30 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
const bool HasFP = hasFP(MF);
- RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
- RDXShadowSlot = RCXShadowSlot + 8;
- // Emit the saves.
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RCXShadowSlot)
- .addReg(X86::RCX);
- addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
- RDXShadowSlot)
- .addReg(X86::RDX);
+
+ // Check if we need to spill RCX and/or RDX.
+ // Here we assume that no earlier prologue instruction changes RCX and/or
+ // RDX, so checking the block live-ins is enough.
+ const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
+ const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
+ int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
+ // Assign the initial slot to both registers, then change RDX's slot if both
+ // need to be spilled.
+ if (IsRCXLiveIn)
+ RCXShadowSlot = InitSlot;
+ if (IsRDXLiveIn)
+ RDXShadowSlot = InitSlot;
+ if (IsRDXLiveIn && IsRCXLiveIn)
+ RDXShadowSlot += 8;
+ // Emit the saves if needed.
+ if (IsRCXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RCXShadowSlot)
+ .addReg(X86::RCX);
+ if (IsRDXLiveIn)
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RDXShadowSlot)
+ .addReg(X86::RDX);
} else {
// Not in the prolog. Copy RAX to a virtual reg.
BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
@@ -661,6 +675,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
// Add code to roundMBB to round the final stack pointer to a page boundary.
+ RoundMBB->addLiveIn(FinalReg);
BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
.addReg(FinalReg)
.addImm(PageMask);
@@ -677,6 +692,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addMBB(LoopMBB);
}
+ LoopMBB->addLiveIn(JoinReg);
addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
false, -PageSize);
@@ -688,6 +704,8 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addImm(0)
.addReg(0)
.addImm(0);
+
+ LoopMBB->addLiveIn(RoundedReg);
BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
.addReg(RoundedReg)
.addReg(ProbeReg);
@@ -697,16 +715,19 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
// If in prolog, restore RDX and RCX.
if (InProlog) {
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RCX),
- X86::RSP, false, RCXShadowSlot);
- addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
- X86::RDX),
- X86::RSP, false, RDXShadowSlot);
+ if (RCXShadowSlot) // It means we spilled RCX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RCX),
+ X86::RSP, false, RCXShadowSlot);
+ if (RDXShadowSlot) // It means we spilled RDX in the prologue.
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
+ TII.get(X86::MOV64rm), X86::RDX),
+ X86::RSP, false, RDXShadowSlot);
}
// Now that the probing is done, add code to continueMBB to update
// the stack pointer for real.
+ ContinueMBB->addLiveIn(SizeReg);
BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(SizeReg);
@@ -734,8 +755,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
CMBBI->setFlag(MachineInstr::FrameSetup);
}
}
-
- // Possible TODO: physreg liveness for InProlog case.
}
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
@@ -2694,7 +2713,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
Regs[FoundRegs++] = Regs[0];
for (int i = 0; i < NumPops; ++i)
- BuildMI(MBB, MBBI, DL,
+ BuildMI(MBB, MBBI, DL,
TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
return true;
@@ -2984,7 +3003,7 @@ struct X86FrameSortingComparator {
// in general. Something to keep in mind, though.
if (DensityAScaled == DensityBScaled)
return A.ObjectAlignment < B.ObjectAlignment;
-
+
return DensityAScaled < DensityBScaled;
}
};
@@ -3020,7 +3039,7 @@ void X86FrameLowering::orderFrameObjects(
if (ObjectSize == 0)
// Variable size. Just use 4.
SortingObjects[Obj].ObjectSize = 4;
- else
+ else
SortingObjects[Obj].ObjectSize = ObjectSize;
}