diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86FrameLowering.cpp')
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86FrameLowering.cpp | 65 | 
1 files changed, 42 insertions, 23 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index a257ec41f75b..e207c343fac8 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -68,7 +68,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {  // needsFrameIndexResolution - Do we need to perform FI resolution for  // this function. Normally, this is required only when the function  // has any stack objects. However, FI resolution actually has another job, -// not apparent from the title - it resolves callframesetup/destroy  +// not apparent from the title - it resolves callframesetup/destroy  // that were not simplified earlier.  // So, this is required for x86 functions that have push sequences even  // when there are no stack objects. @@ -607,8 +607,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,    int64_t RCXShadowSlot = 0;    int64_t RDXShadowSlot = 0; -  // If inlining in the prolog, save RCX and RDX.      -  // Future optimization: don't save or restore if not live in. +  // If inlining in the prolog, save RCX and RDX.    if (InProlog) {      // Compute the offsets. We need to account for things already      // pushed onto the stack at this point: return address, frame @@ -616,15 +615,30 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,      X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();      const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();      const bool HasFP = hasFP(MF); -    RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); -    RDXShadowSlot = RCXShadowSlot + 8; -    // Emit the saves. -    addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, -                 RCXShadowSlot) -        .addReg(X86::RCX); -    addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, -                 RDXShadowSlot) -        .addReg(X86::RDX); + +    // Check if we need to spill RCX and/or RDX. +    // Here we assume that no earlier prologue instruction changes RCX and/or +    // RDX, so checking the block live-ins is enough. +    const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); +    const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); +    int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); +    // Assign the initial slot to both registers, then change RDX's slot if both +    // need to be spilled. +    if (IsRCXLiveIn) +      RCXShadowSlot = InitSlot; +    if (IsRDXLiveIn) +      RDXShadowSlot = InitSlot; +    if (IsRDXLiveIn && IsRCXLiveIn) +      RDXShadowSlot += 8; +    // Emit the saves if needed. +    if (IsRCXLiveIn) +      addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, +                   RCXShadowSlot) +          .addReg(X86::RCX); +    if (IsRDXLiveIn) +      addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, +                   RDXShadowSlot) +          .addReg(X86::RDX);    } else {      // Not in the prolog. Copy RAX to a virtual reg.      BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); @@ -661,6 +675,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,    BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);    // Add code to roundMBB to round the final stack pointer to a page boundary. +  RoundMBB->addLiveIn(FinalReg);    BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)        .addReg(FinalReg)        .addImm(PageMask); @@ -677,6 +692,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,          .addMBB(LoopMBB);    } +  LoopMBB->addLiveIn(JoinReg);    addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,                 false, -PageSize); @@ -688,6 +704,8 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,        .addImm(0)        .addReg(0)        .addImm(0); + +  LoopMBB->addLiveIn(RoundedReg);    BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))        .addReg(RoundedReg)        .addReg(ProbeReg); @@ -697,16 +715,19 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,    // If in prolog, restore RDX and RCX.    if (InProlog) { -    addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), -                         X86::RCX), -                 X86::RSP, false, RCXShadowSlot); -    addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), -                         X86::RDX), -                 X86::RSP, false, RDXShadowSlot); +    if (RCXShadowSlot) // It means we spilled RCX in the prologue. +      addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, +                           TII.get(X86::MOV64rm), X86::RCX), +                   X86::RSP, false, RCXShadowSlot); +    if (RDXShadowSlot) // It means we spilled RDX in the prologue. +      addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, +                           TII.get(X86::MOV64rm), X86::RDX), +                   X86::RSP, false, RDXShadowSlot);    }    // Now that the probing is done, add code to continueMBB to update    // the stack pointer for real. +  ContinueMBB->addLiveIn(SizeReg);    BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)        .addReg(X86::RSP)        .addReg(SizeReg); @@ -734,8 +755,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,        CMBBI->setFlag(MachineInstr::FrameSetup);      }    } - -  // Possible TODO: physreg liveness for InProlog case.  }  void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, @@ -2694,7 +2713,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,      Regs[FoundRegs++] = Regs[0];    for (int i = 0; i < NumPops; ++i) -    BuildMI(MBB, MBBI, DL,  +    BuildMI(MBB, MBBI, DL,              TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);    return true; @@ -2984,7 +3003,7 @@ struct X86FrameSortingComparator {      // in general. Something to keep in mind, though.      if (DensityAScaled == DensityBScaled)        return A.ObjectAlignment < B.ObjectAlignment; -     +      return DensityAScaled < DensityBScaled;    }  }; @@ -3020,7 +3039,7 @@ void X86FrameLowering::orderFrameObjects(      if (ObjectSize == 0)        // Variable size. Just use 4.        SortingObjects[Obj].ObjectSize = 4; -    else       +    else        SortingObjects[Obj].ObjectSize = ObjectSize;    }  | 
